]> Pileus Git - ~andy/linux/blob - kernel/trace/trace.c
tracing: Use __this_cpu_inc/dec operation instead of __get_cpu_var
[~andy/linux] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/irq_work.h>
23 #include <linux/debugfs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/kprobes.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/rwsem.h>
36 #include <linux/slab.h>
37 #include <linux/ctype.h>
38 #include <linux/init.h>
39 #include <linux/poll.h>
40 #include <linux/nmi.h>
41 #include <linux/fs.h>
42
43 #include "trace.h"
44 #include "trace_output.h"
45
46 /*
47  * On boot up, the ring buffer is set to the minimum size, so that
48  * we do not waste memory on systems that are not using tracing.
49  */
50 int ring_buffer_expanded;
51
52 /*
53  * We need to change this state when a selftest is running.
54  * A selftest will lurk into the ring-buffer to count the
55  * entries inserted during the selftest although some concurrent
56  * insertions into the ring-buffer such as trace_printk could occurred
57  * at the same time, giving false positive or negative results.
58  */
59 static bool __read_mostly tracing_selftest_running;
60
61 /*
62  * If a tracer is running, we do not want to run SELFTEST.
63  */
64 bool __read_mostly tracing_selftest_disabled;
65
66 /* For tracers that don't implement custom flags */
67 static struct tracer_opt dummy_tracer_opt[] = {
68         { }
69 };
70
71 static struct tracer_flags dummy_tracer_flags = {
72         .val = 0,
73         .opts = dummy_tracer_opt
74 };
75
76 static int dummy_set_flag(u32 old_flags, u32 bit, int set)
77 {
78         return 0;
79 }
80
81 /*
82  * To prevent the comm cache from being overwritten when no
83  * tracing is active, only save the comm when a trace event
84  * occurred.
85  */
86 static DEFINE_PER_CPU(bool, trace_cmdline_save);
87
88 /*
89  * When a reader is waiting for data, then this variable is
90  * set to true.
91  */
92 static bool trace_wakeup_needed;
93
94 static struct irq_work trace_work_wakeup;
95
96 /*
97  * Kill all tracing for good (never come back).
98  * It is initialized to 1 but will turn to zero if the initialization
99  * of the tracer is successful. But that is the only place that sets
100  * this back to zero.
101  */
102 static int tracing_disabled = 1;
103
104 DEFINE_PER_CPU(int, ftrace_cpu_disabled);
105
106 cpumask_var_t __read_mostly     tracing_buffer_mask;
107
108 /*
109  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
110  *
111  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
112  * is set, then ftrace_dump is called. This will output the contents
113  * of the ftrace buffers to the console.  This is very useful for
114  * capturing traces that lead to crashes and outputing it to a
115  * serial console.
116  *
117  * It is default off, but you can enable it with either specifying
118  * "ftrace_dump_on_oops" in the kernel command line, or setting
119  * /proc/sys/kernel/ftrace_dump_on_oops
120  * Set 1 if you want to dump buffers of all CPUs
121  * Set 2 if you want to dump the buffer of the CPU that triggered oops
122  */
123
124 enum ftrace_dump_mode ftrace_dump_on_oops;
125
126 static int tracing_set_tracer(const char *buf);
127
128 #define MAX_TRACER_SIZE         100
129 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
130 static char *default_bootup_tracer;
131
132 static int __init set_cmdline_ftrace(char *str)
133 {
134         strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
135         default_bootup_tracer = bootup_tracer_buf;
136         /* We are using ftrace early, expand it */
137         ring_buffer_expanded = 1;
138         return 1;
139 }
140 __setup("ftrace=", set_cmdline_ftrace);
141
142 static int __init set_ftrace_dump_on_oops(char *str)
143 {
144         if (*str++ != '=' || !*str) {
145                 ftrace_dump_on_oops = DUMP_ALL;
146                 return 1;
147         }
148
149         if (!strcmp("orig_cpu", str)) {
150                 ftrace_dump_on_oops = DUMP_ORIG;
151                 return 1;
152         }
153
154         return 0;
155 }
156 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
157
158
159 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
160 static char *trace_boot_options __initdata;
161
162 static int __init set_trace_boot_options(char *str)
163 {
164         strncpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
165         trace_boot_options = trace_boot_options_buf;
166         return 0;
167 }
168 __setup("trace_options=", set_trace_boot_options);
169
170 unsigned long long ns2usecs(cycle_t nsec)
171 {
172         nsec += 500;
173         do_div(nsec, 1000);
174         return nsec;
175 }
176
177 /*
178  * The global_trace is the descriptor that holds the tracing
179  * buffers for the live tracing. For each CPU, it contains
180  * a link list of pages that will store trace entries. The
181  * page descriptor of the pages in the memory is used to hold
182  * the link list by linking the lru item in the page descriptor
183  * to each of the pages in the buffer per CPU.
184  *
185  * For each active CPU there is a data field that holds the
186  * pages for the buffer for that CPU. Each CPU has the same number
187  * of pages allocated for its buffer.
188  */
189 static struct trace_array       global_trace;
190
191 static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
192
193 int filter_current_check_discard(struct ring_buffer *buffer,
194                                  struct ftrace_event_call *call, void *rec,
195                                  struct ring_buffer_event *event)
196 {
197         return filter_check_discard(call, rec, buffer, event);
198 }
199 EXPORT_SYMBOL_GPL(filter_current_check_discard);
200
201 cycle_t ftrace_now(int cpu)
202 {
203         u64 ts;
204
205         /* Early boot up does not have a buffer yet */
206         if (!global_trace.buffer)
207                 return trace_clock_local();
208
209         ts = ring_buffer_time_stamp(global_trace.buffer, cpu);
210         ring_buffer_normalize_time_stamp(global_trace.buffer, cpu, &ts);
211
212         return ts;
213 }
214
215 /*
216  * The max_tr is used to snapshot the global_trace when a maximum
217  * latency is reached. Some tracers will use this to store a maximum
218  * trace while it continues examining live traces.
219  *
220  * The buffers for the max_tr are set up the same as the global_trace.
221  * When a snapshot is taken, the link list of the max_tr is swapped
222  * with the link list of the global_trace and the buffers are reset for
223  * the global_trace so the tracing can continue.
224  */
225 static struct trace_array       max_tr;
226
227 static DEFINE_PER_CPU(struct trace_array_cpu, max_tr_data);
228
229 int tracing_is_enabled(void)
230 {
231         return tracing_is_on();
232 }
233
234 /*
235  * trace_buf_size is the size in bytes that is allocated
236  * for a buffer. Note, the number of bytes is always rounded
237  * to page size.
238  *
239  * This number is purposely set to a low number of 16384.
240  * If the dump on oops happens, it will be much appreciated
241  * to not have to wait for all that output. Anyway this can be
242  * boot time and run time configurable.
243  */
244 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
245
246 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
247
248 /* trace_types holds a link list of available tracers. */
249 static struct tracer            *trace_types __read_mostly;
250
251 /* current_trace points to the tracer that is currently active */
252 static struct tracer            *current_trace __read_mostly;
253
254 /*
255  * trace_types_lock is used to protect the trace_types list.
256  */
257 static DEFINE_MUTEX(trace_types_lock);
258
259 /*
260  * serialize the access of the ring buffer
261  *
262  * ring buffer serializes readers, but it is low level protection.
263  * The validity of the events (which returns by ring_buffer_peek() ..etc)
264  * are not protected by ring buffer.
265  *
266  * The content of events may become garbage if we allow other process consumes
267  * these events concurrently:
268  *   A) the page of the consumed events may become a normal page
269  *      (not reader page) in ring buffer, and this page will be rewrited
270  *      by events producer.
271  *   B) The page of the consumed events may become a page for splice_read,
272  *      and this page will be returned to system.
273  *
274  * These primitives allow multi process access to different cpu ring buffer
275  * concurrently.
276  *
277  * These primitives don't distinguish read-only and read-consume access.
278  * Multi read-only access are also serialized.
279  */
280
281 #ifdef CONFIG_SMP
282 static DECLARE_RWSEM(all_cpu_access_lock);
283 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
284
285 static inline void trace_access_lock(int cpu)
286 {
287         if (cpu == TRACE_PIPE_ALL_CPU) {
288                 /* gain it for accessing the whole ring buffer. */
289                 down_write(&all_cpu_access_lock);
290         } else {
291                 /* gain it for accessing a cpu ring buffer. */
292
293                 /* Firstly block other trace_access_lock(TRACE_PIPE_ALL_CPU). */
294                 down_read(&all_cpu_access_lock);
295
296                 /* Secondly block other access to this @cpu ring buffer. */
297                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
298         }
299 }
300
301 static inline void trace_access_unlock(int cpu)
302 {
303         if (cpu == TRACE_PIPE_ALL_CPU) {
304                 up_write(&all_cpu_access_lock);
305         } else {
306                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
307                 up_read(&all_cpu_access_lock);
308         }
309 }
310
311 static inline void trace_access_lock_init(void)
312 {
313         int cpu;
314
315         for_each_possible_cpu(cpu)
316                 mutex_init(&per_cpu(cpu_access_lock, cpu));
317 }
318
319 #else
320
321 static DEFINE_MUTEX(access_lock);
322
323 static inline void trace_access_lock(int cpu)
324 {
325         (void)cpu;
326         mutex_lock(&access_lock);
327 }
328
329 static inline void trace_access_unlock(int cpu)
330 {
331         (void)cpu;
332         mutex_unlock(&access_lock);
333 }
334
335 static inline void trace_access_lock_init(void)
336 {
337 }
338
339 #endif
340
341 /* trace_wait is a waitqueue for tasks blocked on trace_poll */
342 static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
343
344 /* trace_flags holds trace_options default values */
345 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
346         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
347         TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
348         TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS;
349
350 static int trace_stop_count;
351 static DEFINE_RAW_SPINLOCK(tracing_start_lock);
352
353 /**
354  * trace_wake_up - wake up tasks waiting for trace input
355  *
356  * Schedules a delayed work to wake up any task that is blocked on the
357  * trace_wait queue. These is used with trace_poll for tasks polling the
358  * trace.
359  */
360 static void trace_wake_up(struct irq_work *work)
361 {
362         wake_up_all(&trace_wait);
363
364 }
365
366 /**
367  * tracing_on - enable tracing buffers
368  *
369  * This function enables tracing buffers that may have been
370  * disabled with tracing_off.
371  */
372 void tracing_on(void)
373 {
374         if (global_trace.buffer)
375                 ring_buffer_record_on(global_trace.buffer);
376         /*
377          * This flag is only looked at when buffers haven't been
378          * allocated yet. We don't really care about the race
379          * between setting this flag and actually turning
380          * on the buffer.
381          */
382         global_trace.buffer_disabled = 0;
383 }
384 EXPORT_SYMBOL_GPL(tracing_on);
385
386 /**
387  * tracing_off - turn off tracing buffers
388  *
389  * This function stops the tracing buffers from recording data.
390  * It does not disable any overhead the tracers themselves may
391  * be causing. This function simply causes all recording to
392  * the ring buffers to fail.
393  */
394 void tracing_off(void)
395 {
396         if (global_trace.buffer)
397                 ring_buffer_record_off(global_trace.buffer);
398         /*
399          * This flag is only looked at when buffers haven't been
400          * allocated yet. We don't really care about the race
401          * between setting this flag and actually turning
402          * on the buffer.
403          */
404         global_trace.buffer_disabled = 1;
405 }
406 EXPORT_SYMBOL_GPL(tracing_off);
407
408 /**
409  * tracing_is_on - show state of ring buffers enabled
410  */
411 int tracing_is_on(void)
412 {
413         if (global_trace.buffer)
414                 return ring_buffer_record_is_on(global_trace.buffer);
415         return !global_trace.buffer_disabled;
416 }
417 EXPORT_SYMBOL_GPL(tracing_is_on);
418
419 static int __init set_buf_size(char *str)
420 {
421         unsigned long buf_size;
422
423         if (!str)
424                 return 0;
425         buf_size = memparse(str, &str);
426         /* nr_entries can not be zero */
427         if (buf_size == 0)
428                 return 0;
429         trace_buf_size = buf_size;
430         return 1;
431 }
432 __setup("trace_buf_size=", set_buf_size);
433
434 static int __init set_tracing_thresh(char *str)
435 {
436         unsigned long threshold;
437         int ret;
438
439         if (!str)
440                 return 0;
441         ret = kstrtoul(str, 0, &threshold);
442         if (ret < 0)
443                 return 0;
444         tracing_thresh = threshold * 1000;
445         return 1;
446 }
447 __setup("tracing_thresh=", set_tracing_thresh);
448
449 unsigned long nsecs_to_usecs(unsigned long nsecs)
450 {
451         return nsecs / 1000;
452 }
453
454 /* These must match the bit postions in trace_iterator_flags */
455 static const char *trace_options[] = {
456         "print-parent",
457         "sym-offset",
458         "sym-addr",
459         "verbose",
460         "raw",
461         "hex",
462         "bin",
463         "block",
464         "stacktrace",
465         "trace_printk",
466         "ftrace_preempt",
467         "branch",
468         "annotate",
469         "userstacktrace",
470         "sym-userobj",
471         "printk-msg-only",
472         "context-info",
473         "latency-format",
474         "sleep-time",
475         "graph-time",
476         "record-cmd",
477         "overwrite",
478         "disable_on_free",
479         "irq-info",
480         "markers",
481         NULL
482 };
483
484 static struct {
485         u64 (*func)(void);
486         const char *name;
487         int in_ns;              /* is this clock in nanoseconds? */
488 } trace_clocks[] = {
489         { trace_clock_local,    "local",        1 },
490         { trace_clock_global,   "global",       1 },
491         { trace_clock_counter,  "counter",      0 },
492         ARCH_TRACE_CLOCKS
493 };
494
495 int trace_clock_id;
496
497 /*
498  * trace_parser_get_init - gets the buffer for trace parser
499  */
500 int trace_parser_get_init(struct trace_parser *parser, int size)
501 {
502         memset(parser, 0, sizeof(*parser));
503
504         parser->buffer = kmalloc(size, GFP_KERNEL);
505         if (!parser->buffer)
506                 return 1;
507
508         parser->size = size;
509         return 0;
510 }
511
512 /*
513  * trace_parser_put - frees the buffer for trace parser
514  */
515 void trace_parser_put(struct trace_parser *parser)
516 {
517         kfree(parser->buffer);
518 }
519
520 /*
521  * trace_get_user - reads the user input string separated by  space
522  * (matched by isspace(ch))
523  *
524  * For each string found the 'struct trace_parser' is updated,
525  * and the function returns.
526  *
527  * Returns number of bytes read.
528  *
529  * See kernel/trace/trace.h for 'struct trace_parser' details.
530  */
531 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
532         size_t cnt, loff_t *ppos)
533 {
534         char ch;
535         size_t read = 0;
536         ssize_t ret;
537
538         if (!*ppos)
539                 trace_parser_clear(parser);
540
541         ret = get_user(ch, ubuf++);
542         if (ret)
543                 goto out;
544
545         read++;
546         cnt--;
547
548         /*
549          * The parser is not finished with the last write,
550          * continue reading the user input without skipping spaces.
551          */
552         if (!parser->cont) {
553                 /* skip white space */
554                 while (cnt && isspace(ch)) {
555                         ret = get_user(ch, ubuf++);
556                         if (ret)
557                                 goto out;
558                         read++;
559                         cnt--;
560                 }
561
562                 /* only spaces were written */
563                 if (isspace(ch)) {
564                         *ppos += read;
565                         ret = read;
566                         goto out;
567                 }
568
569                 parser->idx = 0;
570         }
571
572         /* read the non-space input */
573         while (cnt && !isspace(ch)) {
574                 if (parser->idx < parser->size - 1)
575                         parser->buffer[parser->idx++] = ch;
576                 else {
577                         ret = -EINVAL;
578                         goto out;
579                 }
580                 ret = get_user(ch, ubuf++);
581                 if (ret)
582                         goto out;
583                 read++;
584                 cnt--;
585         }
586
587         /* We either got finished input or we have to wait for another call. */
588         if (isspace(ch)) {
589                 parser->buffer[parser->idx] = 0;
590                 parser->cont = false;
591         } else {
592                 parser->cont = true;
593                 parser->buffer[parser->idx++] = ch;
594         }
595
596         *ppos += read;
597         ret = read;
598
599 out:
600         return ret;
601 }
602
603 ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
604 {
605         int len;
606         int ret;
607
608         if (!cnt)
609                 return 0;
610
611         if (s->len <= s->readpos)
612                 return -EBUSY;
613
614         len = s->len - s->readpos;
615         if (cnt > len)
616                 cnt = len;
617         ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
618         if (ret == cnt)
619                 return -EFAULT;
620
621         cnt -= ret;
622
623         s->readpos += cnt;
624         return cnt;
625 }
626
627 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
628 {
629         int len;
630
631         if (s->len <= s->readpos)
632                 return -EBUSY;
633
634         len = s->len - s->readpos;
635         if (cnt > len)
636                 cnt = len;
637         memcpy(buf, s->buffer + s->readpos, cnt);
638
639         s->readpos += cnt;
640         return cnt;
641 }
642
643 /*
644  * ftrace_max_lock is used to protect the swapping of buffers
645  * when taking a max snapshot. The buffers themselves are
646  * protected by per_cpu spinlocks. But the action of the swap
647  * needs its own lock.
648  *
649  * This is defined as a arch_spinlock_t in order to help
650  * with performance when lockdep debugging is enabled.
651  *
652  * It is also used in other places outside the update_max_tr
653  * so it needs to be defined outside of the
654  * CONFIG_TRACER_MAX_TRACE.
655  */
656 static arch_spinlock_t ftrace_max_lock =
657         (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
658
659 unsigned long __read_mostly     tracing_thresh;
660
661 #ifdef CONFIG_TRACER_MAX_TRACE
662 unsigned long __read_mostly     tracing_max_latency;
663
664 /*
665  * Copy the new maximum trace into the separate maximum-trace
666  * structure. (this way the maximum trace is permanently saved,
667  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
668  */
669 static void
670 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
671 {
672         struct trace_array_cpu *data = tr->data[cpu];
673         struct trace_array_cpu *max_data;
674
675         max_tr.cpu = cpu;
676         max_tr.time_start = data->preempt_timestamp;
677
678         max_data = max_tr.data[cpu];
679         max_data->saved_latency = tracing_max_latency;
680         max_data->critical_start = data->critical_start;
681         max_data->critical_end = data->critical_end;
682
683         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
684         max_data->pid = tsk->pid;
685         max_data->uid = task_uid(tsk);
686         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
687         max_data->policy = tsk->policy;
688         max_data->rt_priority = tsk->rt_priority;
689
690         /* record this tasks comm */
691         tracing_record_cmdline(tsk);
692 }
693
694 /**
695  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
696  * @tr: tracer
697  * @tsk: the task with the latency
698  * @cpu: The cpu that initiated the trace.
699  *
700  * Flip the buffers between the @tr and the max_tr and record information
701  * about which task was the cause of this latency.
702  */
703 void
704 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
705 {
706         struct ring_buffer *buf = tr->buffer;
707
708         if (trace_stop_count)
709                 return;
710
711         WARN_ON_ONCE(!irqs_disabled());
712
713         /* If we disabled the tracer, stop now */
714         if (current_trace == &nop_trace)
715                 return;
716
717         if (WARN_ON_ONCE(!current_trace->use_max_tr))
718                 return;
719
720         arch_spin_lock(&ftrace_max_lock);
721
722         tr->buffer = max_tr.buffer;
723         max_tr.buffer = buf;
724
725         __update_max_tr(tr, tsk, cpu);
726         arch_spin_unlock(&ftrace_max_lock);
727 }
728
729 /**
730  * update_max_tr_single - only copy one trace over, and reset the rest
731  * @tr - tracer
732  * @tsk - task with the latency
733  * @cpu - the cpu of the buffer to copy.
734  *
735  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
736  */
737 void
738 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
739 {
740         int ret;
741
742         if (trace_stop_count)
743                 return;
744
745         WARN_ON_ONCE(!irqs_disabled());
746         if (!current_trace->use_max_tr) {
747                 WARN_ON_ONCE(1);
748                 return;
749         }
750
751         arch_spin_lock(&ftrace_max_lock);
752
753         ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
754
755         if (ret == -EBUSY) {
756                 /*
757                  * We failed to swap the buffer due to a commit taking
758                  * place on this CPU. We fail to record, but we reset
759                  * the max trace buffer (no one writes directly to it)
760                  * and flag that it failed.
761                  */
762                 trace_array_printk(&max_tr, _THIS_IP_,
763                         "Failed to swap buffers due to commit in progress\n");
764         }
765
766         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
767
768         __update_max_tr(tr, tsk, cpu);
769         arch_spin_unlock(&ftrace_max_lock);
770 }
771 #endif /* CONFIG_TRACER_MAX_TRACE */
772
773 static void default_wait_pipe(struct trace_iterator *iter)
774 {
775         DEFINE_WAIT(wait);
776
777         prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE);
778
779         /*
780          * The events can happen in critical sections where
781          * checking a work queue can cause deadlocks.
782          * After adding a task to the queue, this flag is set
783          * only to notify events to try to wake up the queue
784          * using irq_work.
785          *
786          * We don't clear it even if the buffer is no longer
787          * empty. The flag only causes the next event to run
788          * irq_work to do the work queue wake up. The worse
789          * that can happen if we race with !trace_empty() is that
790          * an event will cause an irq_work to try to wake up
791          * an empty queue.
792          *
793          * There's no reason to protect this flag either, as
794          * the work queue and irq_work logic will do the necessary
795          * synchronization for the wake ups. The only thing
796          * that is necessary is that the wake up happens after
797          * a task has been queued. It's OK for spurious wake ups.
798          */
799         trace_wakeup_needed = true;
800
801         if (trace_empty(iter))
802                 schedule();
803
804         finish_wait(&trace_wait, &wait);
805 }
806
807 /**
808  * register_tracer - register a tracer with the ftrace system.
809  * @type - the plugin for the tracer
810  *
811  * Register a new plugin tracer.
812  */
813 int register_tracer(struct tracer *type)
814 {
815         struct tracer *t;
816         int ret = 0;
817
818         if (!type->name) {
819                 pr_info("Tracer must have a name\n");
820                 return -1;
821         }
822
823         if (strlen(type->name) >= MAX_TRACER_SIZE) {
824                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
825                 return -1;
826         }
827
828         mutex_lock(&trace_types_lock);
829
830         tracing_selftest_running = true;
831
832         for (t = trace_types; t; t = t->next) {
833                 if (strcmp(type->name, t->name) == 0) {
834                         /* already found */
835                         pr_info("Tracer %s already registered\n",
836                                 type->name);
837                         ret = -1;
838                         goto out;
839                 }
840         }
841
842         if (!type->set_flag)
843                 type->set_flag = &dummy_set_flag;
844         if (!type->flags)
845                 type->flags = &dummy_tracer_flags;
846         else
847                 if (!type->flags->opts)
848                         type->flags->opts = dummy_tracer_opt;
849         if (!type->wait_pipe)
850                 type->wait_pipe = default_wait_pipe;
851
852
853 #ifdef CONFIG_FTRACE_STARTUP_TEST
854         if (type->selftest && !tracing_selftest_disabled) {
855                 struct tracer *saved_tracer = current_trace;
856                 struct trace_array *tr = &global_trace;
857
858                 /*
859                  * Run a selftest on this tracer.
860                  * Here we reset the trace buffer, and set the current
861                  * tracer to be this tracer. The tracer can then run some
862                  * internal tracing to verify that everything is in order.
863                  * If we fail, we do not register this tracer.
864                  */
865                 tracing_reset_online_cpus(tr);
866
867                 current_trace = type;
868
869                 /* If we expanded the buffers, make sure the max is expanded too */
870                 if (ring_buffer_expanded && type->use_max_tr)
871                         ring_buffer_resize(max_tr.buffer, trace_buf_size,
872                                                 RING_BUFFER_ALL_CPUS);
873
874                 /* the test is responsible for initializing and enabling */
875                 pr_info("Testing tracer %s: ", type->name);
876                 ret = type->selftest(type, tr);
877                 /* the test is responsible for resetting too */
878                 current_trace = saved_tracer;
879                 if (ret) {
880                         printk(KERN_CONT "FAILED!\n");
881                         /* Add the warning after printing 'FAILED' */
882                         WARN_ON(1);
883                         goto out;
884                 }
885                 /* Only reset on passing, to avoid touching corrupted buffers */
886                 tracing_reset_online_cpus(tr);
887
888                 /* Shrink the max buffer again */
889                 if (ring_buffer_expanded && type->use_max_tr)
890                         ring_buffer_resize(max_tr.buffer, 1,
891                                                 RING_BUFFER_ALL_CPUS);
892
893                 printk(KERN_CONT "PASSED\n");
894         }
895 #endif
896
897         type->next = trace_types;
898         trace_types = type;
899
900  out:
901         tracing_selftest_running = false;
902         mutex_unlock(&trace_types_lock);
903
904         if (ret || !default_bootup_tracer)
905                 goto out_unlock;
906
907         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
908                 goto out_unlock;
909
910         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
911         /* Do we want this tracer to start on bootup? */
912         tracing_set_tracer(type->name);
913         default_bootup_tracer = NULL;
914         /* disable other selftests, since this will break it. */
915         tracing_selftest_disabled = 1;
916 #ifdef CONFIG_FTRACE_STARTUP_TEST
917         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
918                type->name);
919 #endif
920
921  out_unlock:
922         return ret;
923 }
924
925 void tracing_reset(struct trace_array *tr, int cpu)
926 {
927         struct ring_buffer *buffer = tr->buffer;
928
929         if (!buffer)
930                 return;
931
932         ring_buffer_record_disable(buffer);
933
934         /* Make sure all commits have finished */
935         synchronize_sched();
936         ring_buffer_reset_cpu(buffer, cpu);
937
938         ring_buffer_record_enable(buffer);
939 }
940
941 void tracing_reset_online_cpus(struct trace_array *tr)
942 {
943         struct ring_buffer *buffer = tr->buffer;
944         int cpu;
945
946         if (!buffer)
947                 return;
948
949         ring_buffer_record_disable(buffer);
950
951         /* Make sure all commits have finished */
952         synchronize_sched();
953
954         tr->time_start = ftrace_now(tr->cpu);
955
956         for_each_online_cpu(cpu)
957                 ring_buffer_reset_cpu(buffer, cpu);
958
959         ring_buffer_record_enable(buffer);
960 }
961
962 void tracing_reset_current(int cpu)
963 {
964         tracing_reset(&global_trace, cpu);
965 }
966
967 void tracing_reset_current_online_cpus(void)
968 {
969         tracing_reset_online_cpus(&global_trace);
970 }
971
972 #define SAVED_CMDLINES 128
973 #define NO_CMDLINE_MAP UINT_MAX
974 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
975 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
976 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
977 static int cmdline_idx;
978 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
979
980 /* temporary disable recording */
981 static atomic_t trace_record_cmdline_disabled __read_mostly;
982
983 static void trace_init_cmdlines(void)
984 {
985         memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
986         memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
987         cmdline_idx = 0;
988 }
989
990 int is_tracing_stopped(void)
991 {
992         return trace_stop_count;
993 }
994
995 /**
996  * ftrace_off_permanent - disable all ftrace code permanently
997  *
998  * This should only be called when a serious anomally has
999  * been detected.  This will turn off the function tracing,
1000  * ring buffers, and other tracing utilites. It takes no
1001  * locks and can be called from any context.
1002  */
1003 void ftrace_off_permanent(void)
1004 {
1005         tracing_disabled = 1;
1006         ftrace_stop();
1007         tracing_off_permanent();
1008 }
1009
1010 /**
1011  * tracing_start - quick start of the tracer
1012  *
1013  * If tracing is enabled but was stopped by tracing_stop,
1014  * this will start the tracer back up.
1015  */
1016 void tracing_start(void)
1017 {
1018         struct ring_buffer *buffer;
1019         unsigned long flags;
1020
1021         if (tracing_disabled)
1022                 return;
1023
1024         raw_spin_lock_irqsave(&tracing_start_lock, flags);
1025         if (--trace_stop_count) {
1026                 if (trace_stop_count < 0) {
1027                         /* Someone screwed up their debugging */
1028                         WARN_ON_ONCE(1);
1029                         trace_stop_count = 0;
1030                 }
1031                 goto out;
1032         }
1033
1034         /* Prevent the buffers from switching */
1035         arch_spin_lock(&ftrace_max_lock);
1036
1037         buffer = global_trace.buffer;
1038         if (buffer)
1039                 ring_buffer_record_enable(buffer);
1040
1041         buffer = max_tr.buffer;
1042         if (buffer)
1043                 ring_buffer_record_enable(buffer);
1044
1045         arch_spin_unlock(&ftrace_max_lock);
1046
1047         ftrace_start();
1048  out:
1049         raw_spin_unlock_irqrestore(&tracing_start_lock, flags);
1050 }
1051
1052 /**
1053  * tracing_stop - quick stop of the tracer
1054  *
1055  * Light weight way to stop tracing. Use in conjunction with
1056  * tracing_start.
1057  */
1058 void tracing_stop(void)
1059 {
1060         struct ring_buffer *buffer;
1061         unsigned long flags;
1062
1063         ftrace_stop();
1064         raw_spin_lock_irqsave(&tracing_start_lock, flags);
1065         if (trace_stop_count++)
1066                 goto out;
1067
1068         /* Prevent the buffers from switching */
1069         arch_spin_lock(&ftrace_max_lock);
1070
1071         buffer = global_trace.buffer;
1072         if (buffer)
1073                 ring_buffer_record_disable(buffer);
1074
1075         buffer = max_tr.buffer;
1076         if (buffer)
1077                 ring_buffer_record_disable(buffer);
1078
1079         arch_spin_unlock(&ftrace_max_lock);
1080
1081  out:
1082         raw_spin_unlock_irqrestore(&tracing_start_lock, flags);
1083 }
1084
1085 void trace_stop_cmdline_recording(void);
1086
1087 static void trace_save_cmdline(struct task_struct *tsk)
1088 {
1089         unsigned pid, idx;
1090
1091         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1092                 return;
1093
1094         /*
1095          * It's not the end of the world if we don't get
1096          * the lock, but we also don't want to spin
1097          * nor do we want to disable interrupts,
1098          * so if we miss here, then better luck next time.
1099          */
1100         if (!arch_spin_trylock(&trace_cmdline_lock))
1101                 return;
1102
1103         idx = map_pid_to_cmdline[tsk->pid];
1104         if (idx == NO_CMDLINE_MAP) {
1105                 idx = (cmdline_idx + 1) % SAVED_CMDLINES;
1106
1107                 /*
1108                  * Check whether the cmdline buffer at idx has a pid
1109                  * mapped. We are going to overwrite that entry so we
1110                  * need to clear the map_pid_to_cmdline. Otherwise we
1111                  * would read the new comm for the old pid.
1112                  */
1113                 pid = map_cmdline_to_pid[idx];
1114                 if (pid != NO_CMDLINE_MAP)
1115                         map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1116
1117                 map_cmdline_to_pid[idx] = tsk->pid;
1118                 map_pid_to_cmdline[tsk->pid] = idx;
1119
1120                 cmdline_idx = idx;
1121         }
1122
1123         memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
1124
1125         arch_spin_unlock(&trace_cmdline_lock);
1126 }
1127
1128 void trace_find_cmdline(int pid, char comm[])
1129 {
1130         unsigned map;
1131
1132         if (!pid) {
1133                 strcpy(comm, "<idle>");
1134                 return;
1135         }
1136
1137         if (WARN_ON_ONCE(pid < 0)) {
1138                 strcpy(comm, "<XXX>");
1139                 return;
1140         }
1141
1142         if (pid > PID_MAX_DEFAULT) {
1143                 strcpy(comm, "<...>");
1144                 return;
1145         }
1146
1147         preempt_disable();
1148         arch_spin_lock(&trace_cmdline_lock);
1149         map = map_pid_to_cmdline[pid];
1150         if (map != NO_CMDLINE_MAP)
1151                 strcpy(comm, saved_cmdlines[map]);
1152         else
1153                 strcpy(comm, "<...>");
1154
1155         arch_spin_unlock(&trace_cmdline_lock);
1156         preempt_enable();
1157 }
1158
1159 void tracing_record_cmdline(struct task_struct *tsk)
1160 {
1161         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1162                 return;
1163
1164         if (!__this_cpu_read(trace_cmdline_save))
1165                 return;
1166
1167         __this_cpu_write(trace_cmdline_save, false);
1168
1169         trace_save_cmdline(tsk);
1170 }
1171
1172 void
1173 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1174                              int pc)
1175 {
1176         struct task_struct *tsk = current;
1177
1178         entry->preempt_count            = pc & 0xff;
1179         entry->pid                      = (tsk) ? tsk->pid : 0;
1180         entry->flags =
1181 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1182                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1183 #else
1184                 TRACE_FLAG_IRQS_NOSUPPORT |
1185 #endif
1186                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1187                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1188                 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
1189 }
1190 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1191
1192 struct ring_buffer_event *
1193 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1194                           int type,
1195                           unsigned long len,
1196                           unsigned long flags, int pc)
1197 {
1198         struct ring_buffer_event *event;
1199
1200         event = ring_buffer_lock_reserve(buffer, len);
1201         if (event != NULL) {
1202                 struct trace_entry *ent = ring_buffer_event_data(event);
1203
1204                 tracing_generic_entry_update(ent, flags, pc);
1205                 ent->type = type;
1206         }
1207
1208         return event;
1209 }
1210
1211 void
1212 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1213 {
1214         __this_cpu_write(trace_cmdline_save, true);
1215         if (trace_wakeup_needed) {
1216                 trace_wakeup_needed = false;
1217                 /* irq_work_queue() supplies it's own memory barriers */
1218                 irq_work_queue(&trace_work_wakeup);
1219         }
1220         ring_buffer_unlock_commit(buffer, event);
1221 }
1222
1223 static inline void
1224 __trace_buffer_unlock_commit(struct ring_buffer *buffer,
1225                              struct ring_buffer_event *event,
1226                              unsigned long flags, int pc)
1227 {
1228         __buffer_unlock_commit(buffer, event);
1229
1230         ftrace_trace_stack(buffer, flags, 6, pc);
1231         ftrace_trace_userstack(buffer, flags, pc);
1232 }
1233
1234 void trace_buffer_unlock_commit(struct ring_buffer *buffer,
1235                                 struct ring_buffer_event *event,
1236                                 unsigned long flags, int pc)
1237 {
1238         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1239 }
1240 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1241
1242 struct ring_buffer_event *
1243 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1244                                   int type, unsigned long len,
1245                                   unsigned long flags, int pc)
1246 {
1247         *current_rb = global_trace.buffer;
1248         return trace_buffer_lock_reserve(*current_rb,
1249                                          type, len, flags, pc);
1250 }
1251 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1252
1253 void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
1254                                         struct ring_buffer_event *event,
1255                                         unsigned long flags, int pc)
1256 {
1257         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1258 }
1259 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
1260
1261 void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
1262                                      struct ring_buffer_event *event,
1263                                      unsigned long flags, int pc,
1264                                      struct pt_regs *regs)
1265 {
1266         __buffer_unlock_commit(buffer, event);
1267
1268         ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
1269         ftrace_trace_userstack(buffer, flags, pc);
1270 }
1271 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1272
1273 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1274                                          struct ring_buffer_event *event)
1275 {
1276         ring_buffer_discard_commit(buffer, event);
1277 }
1278 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1279
1280 void
1281 trace_function(struct trace_array *tr,
1282                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1283                int pc)
1284 {
1285         struct ftrace_event_call *call = &event_function;
1286         struct ring_buffer *buffer = tr->buffer;
1287         struct ring_buffer_event *event;
1288         struct ftrace_entry *entry;
1289
1290         /* If we are reading the ring buffer, don't trace */
1291         if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1292                 return;
1293
1294         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1295                                           flags, pc);
1296         if (!event)
1297                 return;
1298         entry   = ring_buffer_event_data(event);
1299         entry->ip                       = ip;
1300         entry->parent_ip                = parent_ip;
1301
1302         if (!filter_check_discard(call, entry, buffer, event))
1303                 __buffer_unlock_commit(buffer, event);
1304 }
1305
1306 void
1307 ftrace(struct trace_array *tr, struct trace_array_cpu *data,
1308        unsigned long ip, unsigned long parent_ip, unsigned long flags,
1309        int pc)
1310 {
1311         if (likely(!atomic_read(&data->disabled)))
1312                 trace_function(tr, ip, parent_ip, flags, pc);
1313 }
1314
1315 #ifdef CONFIG_STACKTRACE
1316
1317 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1318 struct ftrace_stack {
1319         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
1320 };
1321
1322 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1323 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1324
1325 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1326                                  unsigned long flags,
1327                                  int skip, int pc, struct pt_regs *regs)
1328 {
1329         struct ftrace_event_call *call = &event_kernel_stack;
1330         struct ring_buffer_event *event;
1331         struct stack_entry *entry;
1332         struct stack_trace trace;
1333         int use_stack;
1334         int size = FTRACE_STACK_ENTRIES;
1335
1336         trace.nr_entries        = 0;
1337         trace.skip              = skip;
1338
1339         /*
1340          * Since events can happen in NMIs there's no safe way to
1341          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1342          * or NMI comes in, it will just have to use the default
1343          * FTRACE_STACK_SIZE.
1344          */
1345         preempt_disable_notrace();
1346
1347         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1348         /*
1349          * We don't need any atomic variables, just a barrier.
1350          * If an interrupt comes in, we don't care, because it would
1351          * have exited and put the counter back to what we want.
1352          * We just need a barrier to keep gcc from moving things
1353          * around.
1354          */
1355         barrier();
1356         if (use_stack == 1) {
1357                 trace.entries           = &__get_cpu_var(ftrace_stack).calls[0];
1358                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
1359
1360                 if (regs)
1361                         save_stack_trace_regs(regs, &trace);
1362                 else
1363                         save_stack_trace(&trace);
1364
1365                 if (trace.nr_entries > size)
1366                         size = trace.nr_entries;
1367         } else
1368                 /* From now on, use_stack is a boolean */
1369                 use_stack = 0;
1370
1371         size *= sizeof(unsigned long);
1372
1373         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1374                                           sizeof(*entry) + size, flags, pc);
1375         if (!event)
1376                 goto out;
1377         entry = ring_buffer_event_data(event);
1378
1379         memset(&entry->caller, 0, size);
1380
1381         if (use_stack)
1382                 memcpy(&entry->caller, trace.entries,
1383                        trace.nr_entries * sizeof(unsigned long));
1384         else {
1385                 trace.max_entries       = FTRACE_STACK_ENTRIES;
1386                 trace.entries           = entry->caller;
1387                 if (regs)
1388                         save_stack_trace_regs(regs, &trace);
1389                 else
1390                         save_stack_trace(&trace);
1391         }
1392
1393         entry->size = trace.nr_entries;
1394
1395         if (!filter_check_discard(call, entry, buffer, event))
1396                 __buffer_unlock_commit(buffer, event);
1397
1398  out:
1399         /* Again, don't let gcc optimize things here */
1400         barrier();
1401         __this_cpu_dec(ftrace_stack_reserve);
1402         preempt_enable_notrace();
1403
1404 }
1405
1406 void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
1407                              int skip, int pc, struct pt_regs *regs)
1408 {
1409         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1410                 return;
1411
1412         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1413 }
1414
1415 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1416                         int skip, int pc)
1417 {
1418         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1419                 return;
1420
1421         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
1422 }
1423
1424 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1425                    int pc)
1426 {
1427         __ftrace_trace_stack(tr->buffer, flags, skip, pc, NULL);
1428 }
1429
1430 /**
1431  * trace_dump_stack - record a stack back trace in the trace buffer
1432  */
1433 void trace_dump_stack(void)
1434 {
1435         unsigned long flags;
1436
1437         if (tracing_disabled || tracing_selftest_running)
1438                 return;
1439
1440         local_save_flags(flags);
1441
1442         /* skipping 3 traces, seems to get us at the caller of this function */
1443         __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count(), NULL);
1444 }
1445
1446 static DEFINE_PER_CPU(int, user_stack_count);
1447
1448 void
1449 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1450 {
1451         struct ftrace_event_call *call = &event_user_stack;
1452         struct ring_buffer_event *event;
1453         struct userstack_entry *entry;
1454         struct stack_trace trace;
1455
1456         if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1457                 return;
1458
1459         /*
1460          * NMIs can not handle page faults, even with fix ups.
1461          * The save user stack can (and often does) fault.
1462          */
1463         if (unlikely(in_nmi()))
1464                 return;
1465
1466         /*
1467          * prevent recursion, since the user stack tracing may
1468          * trigger other kernel events.
1469          */
1470         preempt_disable();
1471         if (__this_cpu_read(user_stack_count))
1472                 goto out;
1473
1474         __this_cpu_inc(user_stack_count);
1475
1476         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1477                                           sizeof(*entry), flags, pc);
1478         if (!event)
1479                 goto out_drop_count;
1480         entry   = ring_buffer_event_data(event);
1481
1482         entry->tgid             = current->tgid;
1483         memset(&entry->caller, 0, sizeof(entry->caller));
1484
1485         trace.nr_entries        = 0;
1486         trace.max_entries       = FTRACE_STACK_ENTRIES;
1487         trace.skip              = 0;
1488         trace.entries           = entry->caller;
1489
1490         save_stack_trace_user(&trace);
1491         if (!filter_check_discard(call, entry, buffer, event))
1492                 __buffer_unlock_commit(buffer, event);
1493
1494  out_drop_count:
1495         __this_cpu_dec(user_stack_count);
1496  out:
1497         preempt_enable();
1498 }
1499
1500 #ifdef UNUSED
1501 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1502 {
1503         ftrace_trace_userstack(tr, flags, preempt_count());
1504 }
1505 #endif /* UNUSED */
1506
1507 #endif /* CONFIG_STACKTRACE */
1508
1509 /* created for use with alloc_percpu */
1510 struct trace_buffer_struct {
1511         char buffer[TRACE_BUF_SIZE];
1512 };
1513
1514 static struct trace_buffer_struct *trace_percpu_buffer;
1515 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1516 static struct trace_buffer_struct *trace_percpu_irq_buffer;
1517 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1518
1519 /*
1520  * The buffer used is dependent on the context. There is a per cpu
1521  * buffer for normal context, softirq contex, hard irq context and
1522  * for NMI context. Thise allows for lockless recording.
1523  *
1524  * Note, if the buffers failed to be allocated, then this returns NULL
1525  */
1526 static char *get_trace_buf(void)
1527 {
1528         struct trace_buffer_struct *percpu_buffer;
1529
1530         /*
1531          * If we have allocated per cpu buffers, then we do not
1532          * need to do any locking.
1533          */
1534         if (in_nmi())
1535                 percpu_buffer = trace_percpu_nmi_buffer;
1536         else if (in_irq())
1537                 percpu_buffer = trace_percpu_irq_buffer;
1538         else if (in_softirq())
1539                 percpu_buffer = trace_percpu_sirq_buffer;
1540         else
1541                 percpu_buffer = trace_percpu_buffer;
1542
1543         if (!percpu_buffer)
1544                 return NULL;
1545
1546         return this_cpu_ptr(&percpu_buffer->buffer[0]);
1547 }
1548
1549 static int alloc_percpu_trace_buffer(void)
1550 {
1551         struct trace_buffer_struct *buffers;
1552         struct trace_buffer_struct *sirq_buffers;
1553         struct trace_buffer_struct *irq_buffers;
1554         struct trace_buffer_struct *nmi_buffers;
1555
1556         buffers = alloc_percpu(struct trace_buffer_struct);
1557         if (!buffers)
1558                 goto err_warn;
1559
1560         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
1561         if (!sirq_buffers)
1562                 goto err_sirq;
1563
1564         irq_buffers = alloc_percpu(struct trace_buffer_struct);
1565         if (!irq_buffers)
1566                 goto err_irq;
1567
1568         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
1569         if (!nmi_buffers)
1570                 goto err_nmi;
1571
1572         trace_percpu_buffer = buffers;
1573         trace_percpu_sirq_buffer = sirq_buffers;
1574         trace_percpu_irq_buffer = irq_buffers;
1575         trace_percpu_nmi_buffer = nmi_buffers;
1576
1577         return 0;
1578
1579  err_nmi:
1580         free_percpu(irq_buffers);
1581  err_irq:
1582         free_percpu(sirq_buffers);
1583  err_sirq:
1584         free_percpu(buffers);
1585  err_warn:
1586         WARN(1, "Could not allocate percpu trace_printk buffer");
1587         return -ENOMEM;
1588 }
1589
1590 static int buffers_allocated;
1591
1592 void trace_printk_init_buffers(void)
1593 {
1594         if (buffers_allocated)
1595                 return;
1596
1597         if (alloc_percpu_trace_buffer())
1598                 return;
1599
1600         pr_info("ftrace: Allocated trace_printk buffers\n");
1601
1602         /* Expand the buffers to set size */
1603         tracing_update_buffers();
1604
1605         buffers_allocated = 1;
1606
1607         /*
1608          * trace_printk_init_buffers() can be called by modules.
1609          * If that happens, then we need to start cmdline recording
1610          * directly here. If the global_trace.buffer is already
1611          * allocated here, then this was called by module code.
1612          */
1613         if (global_trace.buffer)
1614                 tracing_start_cmdline_record();
1615 }
1616
1617 void trace_printk_start_comm(void)
1618 {
1619         /* Start tracing comms if trace printk is set */
1620         if (!buffers_allocated)
1621                 return;
1622         tracing_start_cmdline_record();
1623 }
1624
1625 static void trace_printk_start_stop_comm(int enabled)
1626 {
1627         if (!buffers_allocated)
1628                 return;
1629
1630         if (enabled)
1631                 tracing_start_cmdline_record();
1632         else
1633                 tracing_stop_cmdline_record();
1634 }
1635
1636 /**
1637  * trace_vbprintk - write binary msg to tracing buffer
1638  *
1639  */
1640 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1641 {
1642         struct ftrace_event_call *call = &event_bprint;
1643         struct ring_buffer_event *event;
1644         struct ring_buffer *buffer;
1645         struct trace_array *tr = &global_trace;
1646         struct bprint_entry *entry;
1647         unsigned long flags;
1648         char *tbuffer;
1649         int len = 0, size, pc;
1650
1651         if (unlikely(tracing_selftest_running || tracing_disabled))
1652                 return 0;
1653
1654         /* Don't pollute graph traces with trace_vprintk internals */
1655         pause_graph_tracing();
1656
1657         pc = preempt_count();
1658         preempt_disable_notrace();
1659
1660         tbuffer = get_trace_buf();
1661         if (!tbuffer) {
1662                 len = 0;
1663                 goto out;
1664         }
1665
1666         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
1667
1668         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
1669                 goto out;
1670
1671         local_save_flags(flags);
1672         size = sizeof(*entry) + sizeof(u32) * len;
1673         buffer = tr->buffer;
1674         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
1675                                           flags, pc);
1676         if (!event)
1677                 goto out;
1678         entry = ring_buffer_event_data(event);
1679         entry->ip                       = ip;
1680         entry->fmt                      = fmt;
1681
1682         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
1683         if (!filter_check_discard(call, entry, buffer, event)) {
1684                 __buffer_unlock_commit(buffer, event);
1685                 ftrace_trace_stack(buffer, flags, 6, pc);
1686         }
1687
1688 out:
1689         preempt_enable_notrace();
1690         unpause_graph_tracing();
1691
1692         return len;
1693 }
1694 EXPORT_SYMBOL_GPL(trace_vbprintk);
1695
1696 int trace_array_printk(struct trace_array *tr,
1697                        unsigned long ip, const char *fmt, ...)
1698 {
1699         int ret;
1700         va_list ap;
1701
1702         if (!(trace_flags & TRACE_ITER_PRINTK))
1703                 return 0;
1704
1705         va_start(ap, fmt);
1706         ret = trace_array_vprintk(tr, ip, fmt, ap);
1707         va_end(ap);
1708         return ret;
1709 }
1710
1711 int trace_array_vprintk(struct trace_array *tr,
1712                         unsigned long ip, const char *fmt, va_list args)
1713 {
1714         struct ftrace_event_call *call = &event_print;
1715         struct ring_buffer_event *event;
1716         struct ring_buffer *buffer;
1717         int len = 0, size, pc;
1718         struct print_entry *entry;
1719         unsigned long flags;
1720         char *tbuffer;
1721
1722         if (tracing_disabled || tracing_selftest_running)
1723                 return 0;
1724
1725         /* Don't pollute graph traces with trace_vprintk internals */
1726         pause_graph_tracing();
1727
1728         pc = preempt_count();
1729         preempt_disable_notrace();
1730
1731
1732         tbuffer = get_trace_buf();
1733         if (!tbuffer) {
1734                 len = 0;
1735                 goto out;
1736         }
1737
1738         len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
1739         if (len > TRACE_BUF_SIZE)
1740                 goto out;
1741
1742         local_save_flags(flags);
1743         size = sizeof(*entry) + len + 1;
1744         buffer = tr->buffer;
1745         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
1746                                           flags, pc);
1747         if (!event)
1748                 goto out;
1749         entry = ring_buffer_event_data(event);
1750         entry->ip = ip;
1751
1752         memcpy(&entry->buf, tbuffer, len);
1753         entry->buf[len] = '\0';
1754         if (!filter_check_discard(call, entry, buffer, event)) {
1755                 __buffer_unlock_commit(buffer, event);
1756                 ftrace_trace_stack(buffer, flags, 6, pc);
1757         }
1758  out:
1759         preempt_enable_notrace();
1760         unpause_graph_tracing();
1761
1762         return len;
1763 }
1764
1765 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
1766 {
1767         return trace_array_vprintk(&global_trace, ip, fmt, args);
1768 }
1769 EXPORT_SYMBOL_GPL(trace_vprintk);
1770
1771 static void trace_iterator_increment(struct trace_iterator *iter)
1772 {
1773         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
1774
1775         iter->idx++;
1776         if (buf_iter)
1777                 ring_buffer_read(buf_iter, NULL);
1778 }
1779
1780 static struct trace_entry *
1781 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
1782                 unsigned long *lost_events)
1783 {
1784         struct ring_buffer_event *event;
1785         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
1786
1787         if (buf_iter)
1788                 event = ring_buffer_iter_peek(buf_iter, ts);
1789         else
1790                 event = ring_buffer_peek(iter->tr->buffer, cpu, ts,
1791                                          lost_events);
1792
1793         if (event) {
1794                 iter->ent_size = ring_buffer_event_length(event);
1795                 return ring_buffer_event_data(event);
1796         }
1797         iter->ent_size = 0;
1798         return NULL;
1799 }
1800
1801 static struct trace_entry *
1802 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
1803                   unsigned long *missing_events, u64 *ent_ts)
1804 {
1805         struct ring_buffer *buffer = iter->tr->buffer;
1806         struct trace_entry *ent, *next = NULL;
1807         unsigned long lost_events = 0, next_lost = 0;
1808         int cpu_file = iter->cpu_file;
1809         u64 next_ts = 0, ts;
1810         int next_cpu = -1;
1811         int next_size = 0;
1812         int cpu;
1813
1814         /*
1815          * If we are in a per_cpu trace file, don't bother by iterating over
1816          * all cpu and peek directly.
1817          */
1818         if (cpu_file > TRACE_PIPE_ALL_CPU) {
1819                 if (ring_buffer_empty_cpu(buffer, cpu_file))
1820                         return NULL;
1821                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
1822                 if (ent_cpu)
1823                         *ent_cpu = cpu_file;
1824
1825                 return ent;
1826         }
1827
1828         for_each_tracing_cpu(cpu) {
1829
1830                 if (ring_buffer_empty_cpu(buffer, cpu))
1831                         continue;
1832
1833                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
1834
1835                 /*
1836                  * Pick the entry with the smallest timestamp:
1837                  */
1838                 if (ent && (!next || ts < next_ts)) {
1839                         next = ent;
1840                         next_cpu = cpu;
1841                         next_ts = ts;
1842                         next_lost = lost_events;
1843                         next_size = iter->ent_size;
1844                 }
1845         }
1846
1847         iter->ent_size = next_size;
1848
1849         if (ent_cpu)
1850                 *ent_cpu = next_cpu;
1851
1852         if (ent_ts)
1853                 *ent_ts = next_ts;
1854
1855         if (missing_events)
1856                 *missing_events = next_lost;
1857
1858         return next;
1859 }
1860
1861 /* Find the next real entry, without updating the iterator itself */
1862 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
1863                                           int *ent_cpu, u64 *ent_ts)
1864 {
1865         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
1866 }
1867
1868 /* Find the next real entry, and increment the iterator to the next entry */
1869 void *trace_find_next_entry_inc(struct trace_iterator *iter)
1870 {
1871         iter->ent = __find_next_entry(iter, &iter->cpu,
1872                                       &iter->lost_events, &iter->ts);
1873
1874         if (iter->ent)
1875                 trace_iterator_increment(iter);
1876
1877         return iter->ent ? iter : NULL;
1878 }
1879
1880 static void trace_consume(struct trace_iterator *iter)
1881 {
1882         ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts,
1883                             &iter->lost_events);
1884 }
1885
1886 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
1887 {
1888         struct trace_iterator *iter = m->private;
1889         int i = (int)*pos;
1890         void *ent;
1891
1892         WARN_ON_ONCE(iter->leftover);
1893
1894         (*pos)++;
1895
1896         /* can't go backwards */
1897         if (iter->idx > i)
1898                 return NULL;
1899
1900         if (iter->idx < 0)
1901                 ent = trace_find_next_entry_inc(iter);
1902         else
1903                 ent = iter;
1904
1905         while (ent && iter->idx < i)
1906                 ent = trace_find_next_entry_inc(iter);
1907
1908         iter->pos = *pos;
1909
1910         return ent;
1911 }
1912
1913 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
1914 {
1915         struct trace_array *tr = iter->tr;
1916         struct ring_buffer_event *event;
1917         struct ring_buffer_iter *buf_iter;
1918         unsigned long entries = 0;
1919         u64 ts;
1920
1921         tr->data[cpu]->skipped_entries = 0;
1922
1923         buf_iter = trace_buffer_iter(iter, cpu);
1924         if (!buf_iter)
1925                 return;
1926
1927         ring_buffer_iter_reset(buf_iter);
1928
1929         /*
1930          * We could have the case with the max latency tracers
1931          * that a reset never took place on a cpu. This is evident
1932          * by the timestamp being before the start of the buffer.
1933          */
1934         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
1935                 if (ts >= iter->tr->time_start)
1936                         break;
1937                 entries++;
1938                 ring_buffer_read(buf_iter, NULL);
1939         }
1940
1941         tr->data[cpu]->skipped_entries = entries;
1942 }
1943
1944 /*
1945  * The current tracer is copied to avoid a global locking
1946  * all around.
1947  */
1948 static void *s_start(struct seq_file *m, loff_t *pos)
1949 {
1950         struct trace_iterator *iter = m->private;
1951         static struct tracer *old_tracer;
1952         int cpu_file = iter->cpu_file;
1953         void *p = NULL;
1954         loff_t l = 0;
1955         int cpu;
1956
1957         /* copy the tracer to avoid using a global lock all around */
1958         mutex_lock(&trace_types_lock);
1959         if (unlikely(old_tracer != current_trace && current_trace)) {
1960                 old_tracer = current_trace;
1961                 *iter->trace = *current_trace;
1962         }
1963         mutex_unlock(&trace_types_lock);
1964
1965         atomic_inc(&trace_record_cmdline_disabled);
1966
1967         if (*pos != iter->pos) {
1968                 iter->ent = NULL;
1969                 iter->cpu = 0;
1970                 iter->idx = -1;
1971
1972                 if (cpu_file == TRACE_PIPE_ALL_CPU) {
1973                         for_each_tracing_cpu(cpu)
1974                                 tracing_iter_reset(iter, cpu);
1975                 } else
1976                         tracing_iter_reset(iter, cpu_file);
1977
1978                 iter->leftover = 0;
1979                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
1980                         ;
1981
1982         } else {
1983                 /*
1984                  * If we overflowed the seq_file before, then we want
1985                  * to just reuse the trace_seq buffer again.
1986                  */
1987                 if (iter->leftover)
1988                         p = iter;
1989                 else {
1990                         l = *pos - 1;
1991                         p = s_next(m, p, &l);
1992                 }
1993         }
1994
1995         trace_event_read_lock();
1996         trace_access_lock(cpu_file);
1997         return p;
1998 }
1999
2000 static void s_stop(struct seq_file *m, void *p)
2001 {
2002         struct trace_iterator *iter = m->private;
2003
2004         atomic_dec(&trace_record_cmdline_disabled);
2005         trace_access_unlock(iter->cpu_file);
2006         trace_event_read_unlock();
2007 }
2008
2009 static void
2010 get_total_entries(struct trace_array *tr, unsigned long *total, unsigned long *entries)
2011 {
2012         unsigned long count;
2013         int cpu;
2014
2015         *total = 0;
2016         *entries = 0;
2017
2018         for_each_tracing_cpu(cpu) {
2019                 count = ring_buffer_entries_cpu(tr->buffer, cpu);
2020                 /*
2021                  * If this buffer has skipped entries, then we hold all
2022                  * entries for the trace and we need to ignore the
2023                  * ones before the time stamp.
2024                  */
2025                 if (tr->data[cpu]->skipped_entries) {
2026                         count -= tr->data[cpu]->skipped_entries;
2027                         /* total is the same as the entries */
2028                         *total += count;
2029                 } else
2030                         *total += count +
2031                                 ring_buffer_overrun_cpu(tr->buffer, cpu);
2032                 *entries += count;
2033         }
2034 }
2035
2036 static void print_lat_help_header(struct seq_file *m)
2037 {
2038         seq_puts(m, "#                  _------=> CPU#            \n");
2039         seq_puts(m, "#                 / _-----=> irqs-off        \n");
2040         seq_puts(m, "#                | / _----=> need-resched    \n");
2041         seq_puts(m, "#                || / _---=> hardirq/softirq \n");
2042         seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
2043         seq_puts(m, "#                |||| /     delay             \n");
2044         seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
2045         seq_puts(m, "#     \\   /      |||||  \\    |   /           \n");
2046 }
2047
2048 static void print_event_info(struct trace_array *tr, struct seq_file *m)
2049 {
2050         unsigned long total;
2051         unsigned long entries;
2052
2053         get_total_entries(tr, &total, &entries);
2054         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2055                    entries, total, num_online_cpus());
2056         seq_puts(m, "#\n");
2057 }
2058
2059 static void print_func_help_header(struct trace_array *tr, struct seq_file *m)
2060 {
2061         print_event_info(tr, m);
2062         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n");
2063         seq_puts(m, "#              | |       |          |         |\n");
2064 }
2065
2066 static void print_func_help_header_irq(struct trace_array *tr, struct seq_file *m)
2067 {
2068         print_event_info(tr, m);
2069         seq_puts(m, "#                              _-----=> irqs-off\n");
2070         seq_puts(m, "#                             / _----=> need-resched\n");
2071         seq_puts(m, "#                            | / _---=> hardirq/softirq\n");
2072         seq_puts(m, "#                            || / _--=> preempt-depth\n");
2073         seq_puts(m, "#                            ||| /     delay\n");
2074         seq_puts(m, "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n");
2075         seq_puts(m, "#              | |       |   ||||       |         |\n");
2076 }
2077
2078 void
2079 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2080 {
2081         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2082         struct trace_array *tr = iter->tr;
2083         struct trace_array_cpu *data = tr->data[tr->cpu];
2084         struct tracer *type = current_trace;
2085         unsigned long entries;
2086         unsigned long total;
2087         const char *name = "preemption";
2088
2089         if (type)
2090                 name = type->name;
2091
2092         get_total_entries(tr, &total, &entries);
2093
2094         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2095                    name, UTS_RELEASE);
2096         seq_puts(m, "# -----------------------------------"
2097                  "---------------------------------\n");
2098         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2099                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2100                    nsecs_to_usecs(data->saved_latency),
2101                    entries,
2102                    total,
2103                    tr->cpu,
2104 #if defined(CONFIG_PREEMPT_NONE)
2105                    "server",
2106 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2107                    "desktop",
2108 #elif defined(CONFIG_PREEMPT)
2109                    "preempt",
2110 #else
2111                    "unknown",
2112 #endif
2113                    /* These are reserved for later use */
2114                    0, 0, 0, 0);
2115 #ifdef CONFIG_SMP
2116         seq_printf(m, " #P:%d)\n", num_online_cpus());
2117 #else
2118         seq_puts(m, ")\n");
2119 #endif
2120         seq_puts(m, "#    -----------------\n");
2121         seq_printf(m, "#    | task: %.16s-%d "
2122                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2123                    data->comm, data->pid,
2124                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2125                    data->policy, data->rt_priority);
2126         seq_puts(m, "#    -----------------\n");
2127
2128         if (data->critical_start) {
2129                 seq_puts(m, "#  => started at: ");
2130                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2131                 trace_print_seq(m, &iter->seq);
2132                 seq_puts(m, "\n#  => ended at:   ");
2133                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2134                 trace_print_seq(m, &iter->seq);
2135                 seq_puts(m, "\n#\n");
2136         }
2137
2138         seq_puts(m, "#\n");
2139 }
2140
2141 static void test_cpu_buff_start(struct trace_iterator *iter)
2142 {
2143         struct trace_seq *s = &iter->seq;
2144
2145         if (!(trace_flags & TRACE_ITER_ANNOTATE))
2146                 return;
2147
2148         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2149                 return;
2150
2151         if (cpumask_test_cpu(iter->cpu, iter->started))
2152                 return;
2153
2154         if (iter->tr->data[iter->cpu]->skipped_entries)
2155                 return;
2156
2157         cpumask_set_cpu(iter->cpu, iter->started);
2158
2159         /* Don't print started cpu buffer for the first entry of the trace */
2160         if (iter->idx > 1)
2161                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2162                                 iter->cpu);
2163 }
2164
2165 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2166 {
2167         struct trace_seq *s = &iter->seq;
2168         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2169         struct trace_entry *entry;
2170         struct trace_event *event;
2171
2172         entry = iter->ent;
2173
2174         test_cpu_buff_start(iter);
2175
2176         event = ftrace_find_event(entry->type);
2177
2178         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2179                 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2180                         if (!trace_print_lat_context(iter))
2181                                 goto partial;
2182                 } else {
2183                         if (!trace_print_context(iter))
2184                                 goto partial;
2185                 }
2186         }
2187
2188         if (event)
2189                 return event->funcs->trace(iter, sym_flags, event);
2190
2191         if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
2192                 goto partial;
2193
2194         return TRACE_TYPE_HANDLED;
2195 partial:
2196         return TRACE_TYPE_PARTIAL_LINE;
2197 }
2198
2199 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2200 {
2201         struct trace_seq *s = &iter->seq;
2202         struct trace_entry *entry;
2203         struct trace_event *event;
2204
2205         entry = iter->ent;
2206
2207         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2208                 if (!trace_seq_printf(s, "%d %d %llu ",
2209                                       entry->pid, iter->cpu, iter->ts))
2210                         goto partial;
2211         }
2212
2213         event = ftrace_find_event(entry->type);
2214         if (event)
2215                 return event->funcs->raw(iter, 0, event);
2216
2217         if (!trace_seq_printf(s, "%d ?\n", entry->type))
2218                 goto partial;
2219
2220         return TRACE_TYPE_HANDLED;
2221 partial:
2222         return TRACE_TYPE_PARTIAL_LINE;
2223 }
2224
2225 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2226 {
2227         struct trace_seq *s = &iter->seq;
2228         unsigned char newline = '\n';
2229         struct trace_entry *entry;
2230         struct trace_event *event;
2231
2232         entry = iter->ent;
2233
2234         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2235                 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
2236                 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
2237                 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
2238         }
2239
2240         event = ftrace_find_event(entry->type);
2241         if (event) {
2242                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2243                 if (ret != TRACE_TYPE_HANDLED)
2244                         return ret;
2245         }
2246
2247         SEQ_PUT_FIELD_RET(s, newline);
2248
2249         return TRACE_TYPE_HANDLED;
2250 }
2251
2252 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2253 {
2254         struct trace_seq *s = &iter->seq;
2255         struct trace_entry *entry;
2256         struct trace_event *event;
2257
2258         entry = iter->ent;
2259
2260         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2261                 SEQ_PUT_FIELD_RET(s, entry->pid);
2262                 SEQ_PUT_FIELD_RET(s, iter->cpu);
2263                 SEQ_PUT_FIELD_RET(s, iter->ts);
2264         }
2265
2266         event = ftrace_find_event(entry->type);
2267         return event ? event->funcs->binary(iter, 0, event) :
2268                 TRACE_TYPE_HANDLED;
2269 }
2270
2271 int trace_empty(struct trace_iterator *iter)
2272 {
2273         struct ring_buffer_iter *buf_iter;
2274         int cpu;
2275
2276         /* If we are looking at one CPU buffer, only check that one */
2277         if (iter->cpu_file != TRACE_PIPE_ALL_CPU) {
2278                 cpu = iter->cpu_file;
2279                 buf_iter = trace_buffer_iter(iter, cpu);
2280                 if (buf_iter) {
2281                         if (!ring_buffer_iter_empty(buf_iter))
2282                                 return 0;
2283                 } else {
2284                         if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
2285                                 return 0;
2286                 }
2287                 return 1;
2288         }
2289
2290         for_each_tracing_cpu(cpu) {
2291                 buf_iter = trace_buffer_iter(iter, cpu);
2292                 if (buf_iter) {
2293                         if (!ring_buffer_iter_empty(buf_iter))
2294                                 return 0;
2295                 } else {
2296                         if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
2297                                 return 0;
2298                 }
2299         }
2300
2301         return 1;
2302 }
2303
2304 /*  Called with trace_event_read_lock() held. */
2305 enum print_line_t print_trace_line(struct trace_iterator *iter)
2306 {
2307         enum print_line_t ret;
2308
2309         if (iter->lost_events &&
2310             !trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2311                                  iter->cpu, iter->lost_events))
2312                 return TRACE_TYPE_PARTIAL_LINE;
2313
2314         if (iter->trace && iter->trace->print_line) {
2315                 ret = iter->trace->print_line(iter);
2316                 if (ret != TRACE_TYPE_UNHANDLED)
2317                         return ret;
2318         }
2319
2320         if (iter->ent->type == TRACE_BPRINT &&
2321                         trace_flags & TRACE_ITER_PRINTK &&
2322                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2323                 return trace_print_bprintk_msg_only(iter);
2324
2325         if (iter->ent->type == TRACE_PRINT &&
2326                         trace_flags & TRACE_ITER_PRINTK &&
2327                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2328                 return trace_print_printk_msg_only(iter);
2329
2330         if (trace_flags & TRACE_ITER_BIN)
2331                 return print_bin_fmt(iter);
2332
2333         if (trace_flags & TRACE_ITER_HEX)
2334                 return print_hex_fmt(iter);
2335
2336         if (trace_flags & TRACE_ITER_RAW)
2337                 return print_raw_fmt(iter);
2338
2339         return print_trace_fmt(iter);
2340 }
2341
2342 void trace_latency_header(struct seq_file *m)
2343 {
2344         struct trace_iterator *iter = m->private;
2345
2346         /* print nothing if the buffers are empty */
2347         if (trace_empty(iter))
2348                 return;
2349
2350         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2351                 print_trace_header(m, iter);
2352
2353         if (!(trace_flags & TRACE_ITER_VERBOSE))
2354                 print_lat_help_header(m);
2355 }
2356
2357 void trace_default_header(struct seq_file *m)
2358 {
2359         struct trace_iterator *iter = m->private;
2360
2361         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2362                 return;
2363
2364         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2365                 /* print nothing if the buffers are empty */
2366                 if (trace_empty(iter))
2367                         return;
2368                 print_trace_header(m, iter);
2369                 if (!(trace_flags & TRACE_ITER_VERBOSE))
2370                         print_lat_help_header(m);
2371         } else {
2372                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2373                         if (trace_flags & TRACE_ITER_IRQ_INFO)
2374                                 print_func_help_header_irq(iter->tr, m);
2375                         else
2376                                 print_func_help_header(iter->tr, m);
2377                 }
2378         }
2379 }
2380
2381 static void test_ftrace_alive(struct seq_file *m)
2382 {
2383         if (!ftrace_is_dead())
2384                 return;
2385         seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n");
2386         seq_printf(m, "#          MAY BE MISSING FUNCTION EVENTS\n");
2387 }
2388
2389 static int s_show(struct seq_file *m, void *v)
2390 {
2391         struct trace_iterator *iter = v;
2392         int ret;
2393
2394         if (iter->ent == NULL) {
2395                 if (iter->tr) {
2396                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
2397                         seq_puts(m, "#\n");
2398                         test_ftrace_alive(m);
2399                 }
2400                 if (iter->trace && iter->trace->print_header)
2401                         iter->trace->print_header(m);
2402                 else
2403                         trace_default_header(m);
2404
2405         } else if (iter->leftover) {
2406                 /*
2407                  * If we filled the seq_file buffer earlier, we
2408                  * want to just show it now.
2409                  */
2410                 ret = trace_print_seq(m, &iter->seq);
2411
2412                 /* ret should this time be zero, but you never know */
2413                 iter->leftover = ret;
2414
2415         } else {
2416                 print_trace_line(iter);
2417                 ret = trace_print_seq(m, &iter->seq);
2418                 /*
2419                  * If we overflow the seq_file buffer, then it will
2420                  * ask us for this data again at start up.
2421                  * Use that instead.
2422                  *  ret is 0 if seq_file write succeeded.
2423                  *        -1 otherwise.
2424                  */
2425                 iter->leftover = ret;
2426         }
2427
2428         return 0;
2429 }
2430
2431 static const struct seq_operations tracer_seq_ops = {
2432         .start          = s_start,
2433         .next           = s_next,
2434         .stop           = s_stop,
2435         .show           = s_show,
2436 };
2437
2438 static struct trace_iterator *
2439 __tracing_open(struct inode *inode, struct file *file)
2440 {
2441         long cpu_file = (long) inode->i_private;
2442         struct trace_iterator *iter;
2443         int cpu;
2444
2445         if (tracing_disabled)
2446                 return ERR_PTR(-ENODEV);
2447
2448         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
2449         if (!iter)
2450                 return ERR_PTR(-ENOMEM);
2451
2452         iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
2453                                     GFP_KERNEL);
2454         if (!iter->buffer_iter)
2455                 goto release;
2456
2457         /*
2458          * We make a copy of the current tracer to avoid concurrent
2459          * changes on it while we are reading.
2460          */
2461         mutex_lock(&trace_types_lock);
2462         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
2463         if (!iter->trace)
2464                 goto fail;
2465
2466         if (current_trace)
2467                 *iter->trace = *current_trace;
2468
2469         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
2470                 goto fail;
2471
2472         if (current_trace && current_trace->print_max)
2473                 iter->tr = &max_tr;
2474         else
2475                 iter->tr = &global_trace;
2476         iter->pos = -1;
2477         mutex_init(&iter->mutex);
2478         iter->cpu_file = cpu_file;
2479
2480         /* Notify the tracer early; before we stop tracing. */
2481         if (iter->trace && iter->trace->open)
2482                 iter->trace->open(iter);
2483
2484         /* Annotate start of buffers if we had overruns */
2485         if (ring_buffer_overruns(iter->tr->buffer))
2486                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
2487
2488         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
2489         if (trace_clocks[trace_clock_id].in_ns)
2490                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
2491
2492         /* stop the trace while dumping */
2493         tracing_stop();
2494
2495         if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {
2496                 for_each_tracing_cpu(cpu) {
2497                         iter->buffer_iter[cpu] =
2498                                 ring_buffer_read_prepare(iter->tr->buffer, cpu);
2499                 }
2500                 ring_buffer_read_prepare_sync();
2501                 for_each_tracing_cpu(cpu) {
2502                         ring_buffer_read_start(iter->buffer_iter[cpu]);
2503                         tracing_iter_reset(iter, cpu);
2504                 }
2505         } else {
2506                 cpu = iter->cpu_file;
2507                 iter->buffer_iter[cpu] =
2508                         ring_buffer_read_prepare(iter->tr->buffer, cpu);
2509                 ring_buffer_read_prepare_sync();
2510                 ring_buffer_read_start(iter->buffer_iter[cpu]);
2511                 tracing_iter_reset(iter, cpu);
2512         }
2513
2514         mutex_unlock(&trace_types_lock);
2515
2516         return iter;
2517
2518  fail:
2519         mutex_unlock(&trace_types_lock);
2520         kfree(iter->trace);
2521         kfree(iter->buffer_iter);
2522 release:
2523         seq_release_private(inode, file);
2524         return ERR_PTR(-ENOMEM);
2525 }
2526
2527 int tracing_open_generic(struct inode *inode, struct file *filp)
2528 {
2529         if (tracing_disabled)
2530                 return -ENODEV;
2531
2532         filp->private_data = inode->i_private;
2533         return 0;
2534 }
2535
2536 static int tracing_release(struct inode *inode, struct file *file)
2537 {
2538         struct seq_file *m = file->private_data;
2539         struct trace_iterator *iter;
2540         int cpu;
2541
2542         if (!(file->f_mode & FMODE_READ))
2543                 return 0;
2544
2545         iter = m->private;
2546
2547         mutex_lock(&trace_types_lock);
2548         for_each_tracing_cpu(cpu) {
2549                 if (iter->buffer_iter[cpu])
2550                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
2551         }
2552
2553         if (iter->trace && iter->trace->close)
2554                 iter->trace->close(iter);
2555
2556         /* reenable tracing if it was previously enabled */
2557         tracing_start();
2558         mutex_unlock(&trace_types_lock);
2559
2560         mutex_destroy(&iter->mutex);
2561         free_cpumask_var(iter->started);
2562         kfree(iter->trace);
2563         kfree(iter->buffer_iter);
2564         seq_release_private(inode, file);
2565         return 0;
2566 }
2567
2568 static int tracing_open(struct inode *inode, struct file *file)
2569 {
2570         struct trace_iterator *iter;
2571         int ret = 0;
2572
2573         /* If this file was open for write, then erase contents */
2574         if ((file->f_mode & FMODE_WRITE) &&
2575             (file->f_flags & O_TRUNC)) {
2576                 long cpu = (long) inode->i_private;
2577
2578                 if (cpu == TRACE_PIPE_ALL_CPU)
2579                         tracing_reset_online_cpus(&global_trace);
2580                 else
2581                         tracing_reset(&global_trace, cpu);
2582         }
2583
2584         if (file->f_mode & FMODE_READ) {
2585                 iter = __tracing_open(inode, file);
2586                 if (IS_ERR(iter))
2587                         ret = PTR_ERR(iter);
2588                 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
2589                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
2590         }
2591         return ret;
2592 }
2593
2594 static void *
2595 t_next(struct seq_file *m, void *v, loff_t *pos)
2596 {
2597         struct tracer *t = v;
2598
2599         (*pos)++;
2600
2601         if (t)
2602                 t = t->next;
2603
2604         return t;
2605 }
2606
2607 static void *t_start(struct seq_file *m, loff_t *pos)
2608 {
2609         struct tracer *t;
2610         loff_t l = 0;
2611
2612         mutex_lock(&trace_types_lock);
2613         for (t = trace_types; t && l < *pos; t = t_next(m, t, &l))
2614                 ;
2615
2616         return t;
2617 }
2618
2619 static void t_stop(struct seq_file *m, void *p)
2620 {
2621         mutex_unlock(&trace_types_lock);
2622 }
2623
2624 static int t_show(struct seq_file *m, void *v)
2625 {
2626         struct tracer *t = v;
2627
2628         if (!t)
2629                 return 0;
2630
2631         seq_printf(m, "%s", t->name);
2632         if (t->next)
2633                 seq_putc(m, ' ');
2634         else
2635                 seq_putc(m, '\n');
2636
2637         return 0;
2638 }
2639
2640 static const struct seq_operations show_traces_seq_ops = {
2641         .start          = t_start,
2642         .next           = t_next,
2643         .stop           = t_stop,
2644         .show           = t_show,
2645 };
2646
2647 static int show_traces_open(struct inode *inode, struct file *file)
2648 {
2649         if (tracing_disabled)
2650                 return -ENODEV;
2651
2652         return seq_open(file, &show_traces_seq_ops);
2653 }
2654
2655 static ssize_t
2656 tracing_write_stub(struct file *filp, const char __user *ubuf,
2657                    size_t count, loff_t *ppos)
2658 {
2659         return count;
2660 }
2661
2662 static loff_t tracing_seek(struct file *file, loff_t offset, int origin)
2663 {
2664         if (file->f_mode & FMODE_READ)
2665                 return seq_lseek(file, offset, origin);
2666         else
2667                 return 0;
2668 }
2669
2670 static const struct file_operations tracing_fops = {
2671         .open           = tracing_open,
2672         .read           = seq_read,
2673         .write          = tracing_write_stub,
2674         .llseek         = tracing_seek,
2675         .release        = tracing_release,
2676 };
2677
2678 static const struct file_operations show_traces_fops = {
2679         .open           = show_traces_open,
2680         .read           = seq_read,
2681         .release        = seq_release,
2682         .llseek         = seq_lseek,
2683 };
2684
2685 /*
2686  * Only trace on a CPU if the bitmask is set:
2687  */
2688 static cpumask_var_t tracing_cpumask;
2689
2690 /*
2691  * The tracer itself will not take this lock, but still we want
2692  * to provide a consistent cpumask to user-space:
2693  */
2694 static DEFINE_MUTEX(tracing_cpumask_update_lock);
2695
2696 /*
2697  * Temporary storage for the character representation of the
2698  * CPU bitmask (and one more byte for the newline):
2699  */
2700 static char mask_str[NR_CPUS + 1];
2701
2702 static ssize_t
2703 tracing_cpumask_read(struct file *filp, char __user *ubuf,
2704                      size_t count, loff_t *ppos)
2705 {
2706         int len;
2707
2708         mutex_lock(&tracing_cpumask_update_lock);
2709
2710         len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
2711         if (count - len < 2) {
2712                 count = -EINVAL;
2713                 goto out_err;
2714         }
2715         len += sprintf(mask_str + len, "\n");
2716         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
2717
2718 out_err:
2719         mutex_unlock(&tracing_cpumask_update_lock);
2720
2721         return count;
2722 }
2723
2724 static ssize_t
2725 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2726                       size_t count, loff_t *ppos)
2727 {
2728         int err, cpu;
2729         cpumask_var_t tracing_cpumask_new;
2730
2731         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
2732                 return -ENOMEM;
2733
2734         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
2735         if (err)
2736                 goto err_unlock;
2737
2738         mutex_lock(&tracing_cpumask_update_lock);
2739
2740         local_irq_disable();
2741         arch_spin_lock(&ftrace_max_lock);
2742         for_each_tracing_cpu(cpu) {
2743                 /*
2744                  * Increase/decrease the disabled counter if we are
2745                  * about to flip a bit in the cpumask:
2746                  */
2747                 if (cpumask_test_cpu(cpu, tracing_cpumask) &&
2748                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
2749                         atomic_inc(&global_trace.data[cpu]->disabled);
2750                         ring_buffer_record_disable_cpu(global_trace.buffer, cpu);
2751                 }
2752                 if (!cpumask_test_cpu(cpu, tracing_cpumask) &&
2753                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
2754                         atomic_dec(&global_trace.data[cpu]->disabled);
2755                         ring_buffer_record_enable_cpu(global_trace.buffer, cpu);
2756                 }
2757         }
2758         arch_spin_unlock(&ftrace_max_lock);
2759         local_irq_enable();
2760
2761         cpumask_copy(tracing_cpumask, tracing_cpumask_new);
2762
2763         mutex_unlock(&tracing_cpumask_update_lock);
2764         free_cpumask_var(tracing_cpumask_new);
2765
2766         return count;
2767
2768 err_unlock:
2769         free_cpumask_var(tracing_cpumask_new);
2770
2771         return err;
2772 }
2773
2774 static const struct file_operations tracing_cpumask_fops = {
2775         .open           = tracing_open_generic,
2776         .read           = tracing_cpumask_read,
2777         .write          = tracing_cpumask_write,
2778         .llseek         = generic_file_llseek,
2779 };
2780
2781 static int tracing_trace_options_show(struct seq_file *m, void *v)
2782 {
2783         struct tracer_opt *trace_opts;
2784         u32 tracer_flags;
2785         int i;
2786
2787         mutex_lock(&trace_types_lock);
2788         tracer_flags = current_trace->flags->val;
2789         trace_opts = current_trace->flags->opts;
2790
2791         for (i = 0; trace_options[i]; i++) {
2792                 if (trace_flags & (1 << i))
2793                         seq_printf(m, "%s\n", trace_options[i]);
2794                 else
2795                         seq_printf(m, "no%s\n", trace_options[i]);
2796         }
2797
2798         for (i = 0; trace_opts[i].name; i++) {
2799                 if (tracer_flags & trace_opts[i].bit)
2800                         seq_printf(m, "%s\n", trace_opts[i].name);
2801                 else
2802                         seq_printf(m, "no%s\n", trace_opts[i].name);
2803         }
2804         mutex_unlock(&trace_types_lock);
2805
2806         return 0;
2807 }
2808
2809 static int __set_tracer_option(struct tracer *trace,
2810                                struct tracer_flags *tracer_flags,
2811                                struct tracer_opt *opts, int neg)
2812 {
2813         int ret;
2814
2815         ret = trace->set_flag(tracer_flags->val, opts->bit, !neg);
2816         if (ret)
2817                 return ret;
2818
2819         if (neg)
2820                 tracer_flags->val &= ~opts->bit;
2821         else
2822                 tracer_flags->val |= opts->bit;
2823         return 0;
2824 }
2825
2826 /* Try to assign a tracer specific option */
2827 static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
2828 {
2829         struct tracer_flags *tracer_flags = trace->flags;
2830         struct tracer_opt *opts = NULL;
2831         int i;
2832
2833         for (i = 0; tracer_flags->opts[i].name; i++) {
2834                 opts = &tracer_flags->opts[i];
2835
2836                 if (strcmp(cmp, opts->name) == 0)
2837                         return __set_tracer_option(trace, trace->flags,
2838                                                    opts, neg);
2839         }
2840
2841         return -EINVAL;
2842 }
2843
2844 static void set_tracer_flags(unsigned int mask, int enabled)
2845 {
2846         /* do nothing if flag is already set */
2847         if (!!(trace_flags & mask) == !!enabled)
2848                 return;
2849
2850         if (enabled)
2851                 trace_flags |= mask;
2852         else
2853                 trace_flags &= ~mask;
2854
2855         if (mask == TRACE_ITER_RECORD_CMD)
2856                 trace_event_enable_cmd_record(enabled);
2857
2858         if (mask == TRACE_ITER_OVERWRITE)
2859                 ring_buffer_change_overwrite(global_trace.buffer, enabled);
2860
2861         if (mask == TRACE_ITER_PRINTK)
2862                 trace_printk_start_stop_comm(enabled);
2863 }
2864
2865 static int trace_set_options(char *option)
2866 {
2867         char *cmp;
2868         int neg = 0;
2869         int ret = 0;
2870         int i;
2871
2872         cmp = strstrip(option);
2873
2874         if (strncmp(cmp, "no", 2) == 0) {
2875                 neg = 1;
2876                 cmp += 2;
2877         }
2878
2879         for (i = 0; trace_options[i]; i++) {
2880                 if (strcmp(cmp, trace_options[i]) == 0) {
2881                         set_tracer_flags(1 << i, !neg);
2882                         break;
2883                 }
2884         }
2885
2886         /* If no option could be set, test the specific tracer options */
2887         if (!trace_options[i]) {
2888                 mutex_lock(&trace_types_lock);
2889                 ret = set_tracer_option(current_trace, cmp, neg);
2890                 mutex_unlock(&trace_types_lock);
2891         }
2892
2893         return ret;
2894 }
2895
2896 static ssize_t
2897 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2898                         size_t cnt, loff_t *ppos)
2899 {
2900         char buf[64];
2901
2902         if (cnt >= sizeof(buf))
2903                 return -EINVAL;
2904
2905         if (copy_from_user(&buf, ubuf, cnt))
2906                 return -EFAULT;
2907
2908         buf[cnt] = 0;
2909
2910         trace_set_options(buf);
2911
2912         *ppos += cnt;
2913
2914         return cnt;
2915 }
2916
2917 static int tracing_trace_options_open(struct inode *inode, struct file *file)
2918 {
2919         if (tracing_disabled)
2920                 return -ENODEV;
2921         return single_open(file, tracing_trace_options_show, NULL);
2922 }
2923
2924 static const struct file_operations tracing_iter_fops = {
2925         .open           = tracing_trace_options_open,
2926         .read           = seq_read,
2927         .llseek         = seq_lseek,
2928         .release        = single_release,
2929         .write          = tracing_trace_options_write,
2930 };
2931
2932 static const char readme_msg[] =
2933         "tracing mini-HOWTO:\n\n"
2934         "# mount -t debugfs nodev /sys/kernel/debug\n\n"
2935         "# cat /sys/kernel/debug/tracing/available_tracers\n"
2936         "wakeup wakeup_rt preemptirqsoff preemptoff irqsoff function nop\n\n"
2937         "# cat /sys/kernel/debug/tracing/current_tracer\n"
2938         "nop\n"
2939         "# echo wakeup > /sys/kernel/debug/tracing/current_tracer\n"
2940         "# cat /sys/kernel/debug/tracing/current_tracer\n"
2941         "wakeup\n"
2942         "# cat /sys/kernel/debug/tracing/trace_options\n"
2943         "noprint-parent nosym-offset nosym-addr noverbose\n"
2944         "# echo print-parent > /sys/kernel/debug/tracing/trace_options\n"
2945         "# echo 1 > /sys/kernel/debug/tracing/tracing_on\n"
2946         "# cat /sys/kernel/debug/tracing/trace > /tmp/trace.txt\n"
2947         "# echo 0 > /sys/kernel/debug/tracing/tracing_on\n"
2948 ;
2949
2950 static ssize_t
2951 tracing_readme_read(struct file *filp, char __user *ubuf,
2952                        size_t cnt, loff_t *ppos)
2953 {
2954         return simple_read_from_buffer(ubuf, cnt, ppos,
2955                                         readme_msg, strlen(readme_msg));
2956 }
2957
2958 static const struct file_operations tracing_readme_fops = {
2959         .open           = tracing_open_generic,
2960         .read           = tracing_readme_read,
2961         .llseek         = generic_file_llseek,
2962 };
2963
2964 static ssize_t
2965 tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
2966                                 size_t cnt, loff_t *ppos)
2967 {
2968         char *buf_comm;
2969         char *file_buf;
2970         char *buf;
2971         int len = 0;
2972         int pid;
2973         int i;
2974
2975         file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL);
2976         if (!file_buf)
2977                 return -ENOMEM;
2978
2979         buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL);
2980         if (!buf_comm) {
2981                 kfree(file_buf);
2982                 return -ENOMEM;
2983         }
2984
2985         buf = file_buf;
2986
2987         for (i = 0; i < SAVED_CMDLINES; i++) {
2988                 int r;
2989
2990                 pid = map_cmdline_to_pid[i];
2991                 if (pid == -1 || pid == NO_CMDLINE_MAP)
2992                         continue;
2993
2994                 trace_find_cmdline(pid, buf_comm);
2995                 r = sprintf(buf, "%d %s\n", pid, buf_comm);
2996                 buf += r;
2997                 len += r;
2998         }
2999
3000         len = simple_read_from_buffer(ubuf, cnt, ppos,
3001                                       file_buf, len);
3002
3003         kfree(file_buf);
3004         kfree(buf_comm);
3005
3006         return len;
3007 }
3008
3009 static const struct file_operations tracing_saved_cmdlines_fops = {
3010     .open       = tracing_open_generic,
3011     .read       = tracing_saved_cmdlines_read,
3012     .llseek     = generic_file_llseek,
3013 };
3014
3015 static ssize_t
3016 tracing_set_trace_read(struct file *filp, char __user *ubuf,
3017                        size_t cnt, loff_t *ppos)
3018 {
3019         char buf[MAX_TRACER_SIZE+2];
3020         int r;
3021
3022         mutex_lock(&trace_types_lock);
3023         if (current_trace)
3024                 r = sprintf(buf, "%s\n", current_trace->name);
3025         else
3026                 r = sprintf(buf, "\n");
3027         mutex_unlock(&trace_types_lock);
3028
3029         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3030 }
3031
3032 int tracer_init(struct tracer *t, struct trace_array *tr)
3033 {
3034         tracing_reset_online_cpus(tr);
3035         return t->init(tr);
3036 }
3037
3038 static void set_buffer_entries(struct trace_array *tr, unsigned long val)
3039 {
3040         int cpu;
3041         for_each_tracing_cpu(cpu)
3042                 tr->data[cpu]->entries = val;
3043 }
3044
3045 /* resize @tr's buffer to the size of @size_tr's entries */
3046 static int resize_buffer_duplicate_size(struct trace_array *tr,
3047                                         struct trace_array *size_tr, int cpu_id)
3048 {
3049         int cpu, ret = 0;
3050
3051         if (cpu_id == RING_BUFFER_ALL_CPUS) {
3052                 for_each_tracing_cpu(cpu) {
3053                         ret = ring_buffer_resize(tr->buffer,
3054                                         size_tr->data[cpu]->entries, cpu);
3055                         if (ret < 0)
3056                                 break;
3057                         tr->data[cpu]->entries = size_tr->data[cpu]->entries;
3058                 }
3059         } else {
3060                 ret = ring_buffer_resize(tr->buffer,
3061                                         size_tr->data[cpu_id]->entries, cpu_id);
3062                 if (ret == 0)
3063                         tr->data[cpu_id]->entries =
3064                                 size_tr->data[cpu_id]->entries;
3065         }
3066
3067         return ret;
3068 }
3069
3070 static int __tracing_resize_ring_buffer(unsigned long size, int cpu)
3071 {
3072         int ret;
3073
3074         /*
3075          * If kernel or user changes the size of the ring buffer
3076          * we use the size that was given, and we can forget about
3077          * expanding it later.
3078          */
3079         ring_buffer_expanded = 1;
3080
3081         /* May be called before buffers are initialized */
3082         if (!global_trace.buffer)
3083                 return 0;
3084
3085         ret = ring_buffer_resize(global_trace.buffer, size, cpu);
3086         if (ret < 0)
3087                 return ret;
3088
3089         if (!current_trace->use_max_tr)
3090                 goto out;
3091
3092         ret = ring_buffer_resize(max_tr.buffer, size, cpu);
3093         if (ret < 0) {
3094                 int r = resize_buffer_duplicate_size(&global_trace,
3095                                                      &global_trace, cpu);
3096                 if (r < 0) {
3097                         /*
3098                          * AARGH! We are left with different
3099                          * size max buffer!!!!
3100                          * The max buffer is our "snapshot" buffer.
3101                          * When a tracer needs a snapshot (one of the
3102                          * latency tracers), it swaps the max buffer
3103                          * with the saved snap shot. We succeeded to
3104                          * update the size of the main buffer, but failed to
3105                          * update the size of the max buffer. But when we tried
3106                          * to reset the main buffer to the original size, we
3107                          * failed there too. This is very unlikely to
3108                          * happen, but if it does, warn and kill all
3109                          * tracing.
3110                          */
3111                         WARN_ON(1);
3112                         tracing_disabled = 1;
3113                 }
3114                 return ret;
3115         }
3116
3117         if (cpu == RING_BUFFER_ALL_CPUS)
3118                 set_buffer_entries(&max_tr, size);
3119         else
3120                 max_tr.data[cpu]->entries = size;
3121
3122  out:
3123         if (cpu == RING_BUFFER_ALL_CPUS)
3124                 set_buffer_entries(&global_trace, size);
3125         else
3126                 global_trace.data[cpu]->entries = size;
3127
3128         return ret;
3129 }
3130
3131 static ssize_t tracing_resize_ring_buffer(unsigned long size, int cpu_id)
3132 {
3133         int ret = size;
3134
3135         mutex_lock(&trace_types_lock);
3136
3137         if (cpu_id != RING_BUFFER_ALL_CPUS) {
3138                 /* make sure, this cpu is enabled in the mask */
3139                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
3140                         ret = -EINVAL;
3141                         goto out;
3142                 }
3143         }
3144
3145         ret = __tracing_resize_ring_buffer(size, cpu_id);
3146         if (ret < 0)
3147                 ret = -ENOMEM;
3148
3149 out:
3150         mutex_unlock(&trace_types_lock);
3151
3152         return ret;
3153 }
3154
3155
3156 /**
3157  * tracing_update_buffers - used by tracing facility to expand ring buffers
3158  *
3159  * To save on memory when the tracing is never used on a system with it
3160  * configured in. The ring buffers are set to a minimum size. But once
3161  * a user starts to use the tracing facility, then they need to grow
3162  * to their default size.
3163  *
3164  * This function is to be called when a tracer is about to be used.
3165  */
3166 int tracing_update_buffers(void)
3167 {
3168         int ret = 0;
3169
3170         mutex_lock(&trace_types_lock);
3171         if (!ring_buffer_expanded)
3172                 ret = __tracing_resize_ring_buffer(trace_buf_size,
3173                                                 RING_BUFFER_ALL_CPUS);
3174         mutex_unlock(&trace_types_lock);
3175
3176         return ret;
3177 }
3178
3179 struct trace_option_dentry;
3180
3181 static struct trace_option_dentry *
3182 create_trace_option_files(struct tracer *tracer);
3183
3184 static void
3185 destroy_trace_option_files(struct trace_option_dentry *topts);
3186
3187 static int tracing_set_tracer(const char *buf)
3188 {
3189         static struct trace_option_dentry *topts;
3190         struct trace_array *tr = &global_trace;
3191         struct tracer *t;
3192         bool had_max_tr;
3193         int ret = 0;
3194
3195         mutex_lock(&trace_types_lock);
3196
3197         if (!ring_buffer_expanded) {
3198                 ret = __tracing_resize_ring_buffer(trace_buf_size,
3199                                                 RING_BUFFER_ALL_CPUS);
3200                 if (ret < 0)
3201                         goto out;
3202                 ret = 0;
3203         }
3204
3205         for (t = trace_types; t; t = t->next) {
3206                 if (strcmp(t->name, buf) == 0)
3207                         break;
3208         }
3209         if (!t) {
3210                 ret = -EINVAL;
3211                 goto out;
3212         }
3213         if (t == current_trace)
3214                 goto out;
3215
3216         trace_branch_disable();
3217         if (current_trace && current_trace->reset)
3218                 current_trace->reset(tr);
3219
3220         had_max_tr = current_trace && current_trace->use_max_tr;
3221         current_trace = &nop_trace;
3222
3223         if (had_max_tr && !t->use_max_tr) {
3224                 /*
3225                  * We need to make sure that the update_max_tr sees that
3226                  * current_trace changed to nop_trace to keep it from
3227                  * swapping the buffers after we resize it.
3228                  * The update_max_tr is called from interrupts disabled
3229                  * so a synchronized_sched() is sufficient.
3230                  */
3231                 synchronize_sched();
3232                 /*
3233                  * We don't free the ring buffer. instead, resize it because
3234                  * The max_tr ring buffer has some state (e.g. ring->clock) and
3235                  * we want preserve it.
3236                  */
3237                 ring_buffer_resize(max_tr.buffer, 1, RING_BUFFER_ALL_CPUS);
3238                 set_buffer_entries(&max_tr, 1);
3239         }
3240         destroy_trace_option_files(topts);
3241
3242         topts = create_trace_option_files(t);
3243         if (t->use_max_tr && !had_max_tr) {
3244                 /* we need to make per cpu buffer sizes equivalent */
3245                 ret = resize_buffer_duplicate_size(&max_tr, &global_trace,
3246                                                    RING_BUFFER_ALL_CPUS);
3247                 if (ret < 0)
3248                         goto out;
3249         }
3250
3251         if (t->init) {
3252                 ret = tracer_init(t, tr);
3253                 if (ret)
3254                         goto out;
3255         }
3256
3257         current_trace = t;
3258         trace_branch_enable(tr);
3259  out:
3260         mutex_unlock(&trace_types_lock);
3261
3262         return ret;
3263 }
3264
3265 static ssize_t
3266 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
3267                         size_t cnt, loff_t *ppos)
3268 {
3269         char buf[MAX_TRACER_SIZE+1];
3270         int i;
3271         size_t ret;
3272         int err;
3273
3274         ret = cnt;
3275
3276         if (cnt > MAX_TRACER_SIZE)
3277                 cnt = MAX_TRACER_SIZE;
3278
3279         if (copy_from_user(&buf, ubuf, cnt))
3280                 return -EFAULT;
3281
3282         buf[cnt] = 0;
3283
3284         /* strip ending whitespace. */
3285         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
3286                 buf[i] = 0;
3287
3288         err = tracing_set_tracer(buf);
3289         if (err)
3290                 return err;
3291
3292         *ppos += ret;
3293
3294         return ret;
3295 }
3296
3297 static ssize_t
3298 tracing_max_lat_read(struct file *filp, char __user *ubuf,
3299                      size_t cnt, loff_t *ppos)
3300 {
3301         unsigned long *ptr = filp->private_data;
3302         char buf[64];
3303         int r;
3304
3305         r = snprintf(buf, sizeof(buf), "%ld\n",
3306                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
3307         if (r > sizeof(buf))
3308                 r = sizeof(buf);
3309         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3310 }
3311
3312 static ssize_t
3313 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
3314                       size_t cnt, loff_t *ppos)
3315 {
3316         unsigned long *ptr = filp->private_data;
3317         unsigned long val;
3318         int ret;
3319
3320         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
3321         if (ret)
3322                 return ret;
3323
3324         *ptr = val * 1000;
3325
3326         return cnt;
3327 }
3328
3329 static int tracing_open_pipe(struct inode *inode, struct file *filp)
3330 {
3331         long cpu_file = (long) inode->i_private;
3332         struct trace_iterator *iter;
3333         int ret = 0;
3334
3335         if (tracing_disabled)
3336                 return -ENODEV;
3337
3338         mutex_lock(&trace_types_lock);
3339
3340         /* create a buffer to store the information to pass to userspace */
3341         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
3342         if (!iter) {
3343                 ret = -ENOMEM;
3344                 goto out;
3345         }
3346
3347         /*
3348          * We make a copy of the current tracer to avoid concurrent
3349          * changes on it while we are reading.
3350          */
3351         iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
3352         if (!iter->trace) {
3353                 ret = -ENOMEM;
3354                 goto fail;
3355         }
3356         if (current_trace)
3357                 *iter->trace = *current_trace;
3358
3359         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
3360                 ret = -ENOMEM;
3361                 goto fail;
3362         }
3363
3364         /* trace pipe does not show start of buffer */
3365         cpumask_setall(iter->started);
3366
3367         if (trace_flags & TRACE_ITER_LATENCY_FMT)
3368                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
3369
3370         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3371         if (trace_clocks[trace_clock_id].in_ns)
3372                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3373
3374         iter->cpu_file = cpu_file;
3375         iter->tr = &global_trace;
3376         mutex_init(&iter->mutex);
3377         filp->private_data = iter;
3378
3379         if (iter->trace->pipe_open)
3380                 iter->trace->pipe_open(iter);
3381
3382         nonseekable_open(inode, filp);
3383 out:
3384         mutex_unlock(&trace_types_lock);
3385         return ret;
3386
3387 fail:
3388         kfree(iter->trace);
3389         kfree(iter);
3390         mutex_unlock(&trace_types_lock);
3391         return ret;
3392 }
3393
3394 static int tracing_release_pipe(struct inode *inode, struct file *file)
3395 {
3396         struct trace_iterator *iter = file->private_data;
3397
3398         mutex_lock(&trace_types_lock);
3399
3400         if (iter->trace->pipe_close)
3401                 iter->trace->pipe_close(iter);
3402
3403         mutex_unlock(&trace_types_lock);
3404
3405         free_cpumask_var(iter->started);
3406         mutex_destroy(&iter->mutex);
3407         kfree(iter->trace);
3408         kfree(iter);
3409
3410         return 0;
3411 }
3412
3413 static unsigned int
3414 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
3415 {
3416         struct trace_iterator *iter = filp->private_data;
3417
3418         if (trace_flags & TRACE_ITER_BLOCK) {
3419                 /*
3420                  * Always select as readable when in blocking mode
3421                  */
3422                 return POLLIN | POLLRDNORM;
3423         } else {
3424                 if (!trace_empty(iter))
3425                         return POLLIN | POLLRDNORM;
3426                 poll_wait(filp, &trace_wait, poll_table);
3427                 if (!trace_empty(iter))
3428                         return POLLIN | POLLRDNORM;
3429
3430                 return 0;
3431         }
3432 }
3433
3434 /*
3435  * This is a make-shift waitqueue.
3436  * A tracer might use this callback on some rare cases:
3437  *
3438  *  1) the current tracer might hold the runqueue lock when it wakes up
3439  *     a reader, hence a deadlock (sched, function, and function graph tracers)
3440  *  2) the function tracers, trace all functions, we don't want
3441  *     the overhead of calling wake_up and friends
3442  *     (and tracing them too)
3443  *
3444  *     Anyway, this is really very primitive wakeup.
3445  */
3446 void poll_wait_pipe(struct trace_iterator *iter)
3447 {
3448         set_current_state(TASK_INTERRUPTIBLE);
3449         /* sleep for 100 msecs, and try again. */
3450         schedule_timeout(HZ / 10);
3451 }
3452
3453 /* Must be called with trace_types_lock mutex held. */
3454 static int tracing_wait_pipe(struct file *filp)
3455 {
3456         struct trace_iterator *iter = filp->private_data;
3457
3458         while (trace_empty(iter)) {
3459
3460                 if ((filp->f_flags & O_NONBLOCK)) {
3461                         return -EAGAIN;
3462                 }
3463
3464                 mutex_unlock(&iter->mutex);
3465
3466                 iter->trace->wait_pipe(iter);
3467
3468                 mutex_lock(&iter->mutex);
3469
3470                 if (signal_pending(current))
3471                         return -EINTR;
3472
3473                 /*
3474                  * We block until we read something and tracing is disabled.
3475                  * We still block if tracing is disabled, but we have never
3476                  * read anything. This allows a user to cat this file, and
3477                  * then enable tracing. But after we have read something,
3478                  * we give an EOF when tracing is again disabled.
3479                  *
3480                  * iter->pos will be 0 if we haven't read anything.
3481                  */
3482                 if (!tracing_is_enabled() && iter->pos)
3483                         break;
3484         }
3485
3486         return 1;
3487 }
3488
3489 /*
3490  * Consumer reader.
3491  */
3492 static ssize_t
3493 tracing_read_pipe(struct file *filp, char __user *ubuf,
3494                   size_t cnt, loff_t *ppos)
3495 {
3496         struct trace_iterator *iter = filp->private_data;
3497         static struct tracer *old_tracer;
3498         ssize_t sret;
3499
3500         /* return any leftover data */
3501         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
3502         if (sret != -EBUSY)
3503                 return sret;
3504
3505         trace_seq_init(&iter->seq);
3506
3507         /* copy the tracer to avoid using a global lock all around */
3508         mutex_lock(&trace_types_lock);
3509         if (unlikely(old_tracer != current_trace && current_trace)) {
3510                 old_tracer = current_trace;
3511                 *iter->trace = *current_trace;
3512         }
3513         mutex_unlock(&trace_types_lock);
3514
3515         /*
3516          * Avoid more than one consumer on a single file descriptor
3517          * This is just a matter of traces coherency, the ring buffer itself
3518          * is protected.
3519          */
3520         mutex_lock(&iter->mutex);
3521         if (iter->trace->read) {
3522                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
3523                 if (sret)
3524                         goto out;
3525         }
3526
3527 waitagain:
3528         sret = tracing_wait_pipe(filp);
3529         if (sret <= 0)
3530                 goto out;
3531
3532         /* stop when tracing is finished */
3533         if (trace_empty(iter)) {
3534                 sret = 0;
3535                 goto out;
3536         }
3537
3538         if (cnt >= PAGE_SIZE)
3539                 cnt = PAGE_SIZE - 1;
3540
3541         /* reset all but tr, trace, and overruns */
3542         memset(&iter->seq, 0,
3543                sizeof(struct trace_iterator) -
3544                offsetof(struct trace_iterator, seq));
3545         iter->pos = -1;
3546
3547         trace_event_read_lock();
3548         trace_access_lock(iter->cpu_file);
3549         while (trace_find_next_entry_inc(iter) != NULL) {
3550                 enum print_line_t ret;
3551                 int len = iter->seq.len;
3552
3553                 ret = print_trace_line(iter);
3554                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
3555                         /* don't print partial lines */
3556                         iter->seq.len = len;
3557                         break;
3558                 }
3559                 if (ret != TRACE_TYPE_NO_CONSUME)
3560                         trace_consume(iter);
3561
3562                 if (iter->seq.len >= cnt)
3563                         break;
3564
3565                 /*
3566                  * Setting the full flag means we reached the trace_seq buffer
3567                  * size and we should leave by partial output condition above.
3568                  * One of the trace_seq_* functions is not used properly.
3569                  */
3570                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
3571                           iter->ent->type);
3572         }
3573         trace_access_unlock(iter->cpu_file);
3574         trace_event_read_unlock();
3575
3576         /* Now copy what we have to the user */
3577         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
3578         if (iter->seq.readpos >= iter->seq.len)
3579                 trace_seq_init(&iter->seq);
3580
3581         /*
3582          * If there was nothing to send to user, in spite of consuming trace
3583          * entries, go back to wait for more entries.
3584          */
3585         if (sret == -EBUSY)
3586                 goto waitagain;
3587
3588 out:
3589         mutex_unlock(&iter->mutex);
3590
3591         return sret;
3592 }
3593
3594 static void tracing_pipe_buf_release(struct pipe_inode_info *pipe,
3595                                      struct pipe_buffer *buf)
3596 {
3597         __free_page(buf->page);
3598 }
3599
3600 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
3601                                      unsigned int idx)
3602 {
3603         __free_page(spd->pages[idx]);
3604 }
3605
3606 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
3607         .can_merge              = 0,
3608         .map                    = generic_pipe_buf_map,
3609         .unmap                  = generic_pipe_buf_unmap,
3610         .confirm                = generic_pipe_buf_confirm,
3611         .release                = tracing_pipe_buf_release,
3612         .steal                  = generic_pipe_buf_steal,
3613         .get                    = generic_pipe_buf_get,
3614 };
3615
3616 static size_t
3617 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
3618 {
3619         size_t count;
3620         int ret;
3621
3622         /* Seq buffer is page-sized, exactly what we need. */
3623         for (;;) {
3624                 count = iter->seq.len;
3625                 ret = print_trace_line(iter);
3626                 count = iter->seq.len - count;
3627                 if (rem < count) {
3628                         rem = 0;
3629                         iter->seq.len -= count;
3630                         break;
3631                 }
3632                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
3633                         iter->seq.len -= count;
3634                         break;
3635                 }
3636
3637                 if (ret != TRACE_TYPE_NO_CONSUME)
3638                         trace_consume(iter);
3639                 rem -= count;
3640                 if (!trace_find_next_entry_inc(iter))   {
3641                         rem = 0;
3642                         iter->ent = NULL;
3643                         break;
3644                 }
3645         }
3646
3647         return rem;
3648 }
3649
3650 static ssize_t tracing_splice_read_pipe(struct file *filp,
3651                                         loff_t *ppos,
3652                                         struct pipe_inode_info *pipe,
3653                                         size_t len,
3654                                         unsigned int flags)
3655 {
3656         struct page *pages_def[PIPE_DEF_BUFFERS];
3657         struct partial_page partial_def[PIPE_DEF_BUFFERS];
3658         struct trace_iterator *iter = filp->private_data;
3659         struct splice_pipe_desc spd = {
3660                 .pages          = pages_def,
3661                 .partial        = partial_def,
3662                 .nr_pages       = 0, /* This gets updated below. */
3663                 .nr_pages_max   = PIPE_DEF_BUFFERS,
3664                 .flags          = flags,
3665                 .ops            = &tracing_pipe_buf_ops,
3666                 .spd_release    = tracing_spd_release_pipe,
3667         };
3668         static struct tracer *old_tracer;
3669         ssize_t ret;
3670         size_t rem;
3671         unsigned int i;
3672
3673         if (splice_grow_spd(pipe, &spd))
3674                 return -ENOMEM;
3675
3676         /* copy the tracer to avoid using a global lock all around */
3677         mutex_lock(&trace_types_lock);
3678         if (unlikely(old_tracer != current_trace && current_trace)) {
3679                 old_tracer = current_trace;
3680                 *iter->trace = *current_trace;
3681         }
3682         mutex_unlock(&trace_types_lock);
3683
3684         mutex_lock(&iter->mutex);
3685
3686         if (iter->trace->splice_read) {
3687                 ret = iter->trace->splice_read(iter, filp,
3688                                                ppos, pipe, len, flags);
3689                 if (ret)
3690                         goto out_err;
3691         }
3692
3693         ret = tracing_wait_pipe(filp);
3694         if (ret <= 0)
3695                 goto out_err;
3696
3697         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
3698                 ret = -EFAULT;
3699                 goto out_err;
3700         }
3701
3702         trace_event_read_lock();
3703         trace_access_lock(iter->cpu_file);
3704
3705         /* Fill as many pages as possible. */
3706         for (i = 0, rem = len; i < pipe->buffers && rem; i++) {
3707                 spd.pages[i] = alloc_page(GFP_KERNEL);
3708                 if (!spd.pages[i])
3709                         break;
3710
3711                 rem = tracing_fill_pipe_page(rem, iter);
3712
3713                 /* Copy the data into the page, so we can start over. */
3714                 ret = trace_seq_to_buffer(&iter->seq,
3715                                           page_address(spd.pages[i]),
3716                                           iter->seq.len);
3717                 if (ret < 0) {
3718                         __free_page(spd.pages[i]);
3719                         break;
3720                 }
3721                 spd.partial[i].offset = 0;
3722                 spd.partial[i].len = iter->seq.len;
3723
3724                 trace_seq_init(&iter->seq);
3725         }
3726
3727         trace_access_unlock(iter->cpu_file);
3728         trace_event_read_unlock();
3729         mutex_unlock(&iter->mutex);
3730
3731         spd.nr_pages = i;
3732
3733         ret = splice_to_pipe(pipe, &spd);
3734 out:
3735         splice_shrink_spd(&spd);
3736         return ret;
3737
3738 out_err:
3739         mutex_unlock(&iter->mutex);
3740         goto out;
3741 }
3742
3743 struct ftrace_entries_info {
3744         struct trace_array      *tr;
3745         int                     cpu;
3746 };
3747
3748 static int tracing_entries_open(struct inode *inode, struct file *filp)
3749 {
3750         struct ftrace_entries_info *info;
3751
3752         if (tracing_disabled)
3753                 return -ENODEV;
3754
3755         info = kzalloc(sizeof(*info), GFP_KERNEL);
3756         if (!info)
3757                 return -ENOMEM;
3758
3759         info->tr = &global_trace;
3760         info->cpu = (unsigned long)inode->i_private;
3761
3762         filp->private_data = info;
3763
3764         return 0;
3765 }
3766
3767 static ssize_t
3768 tracing_entries_read(struct file *filp, char __user *ubuf,
3769                      size_t cnt, loff_t *ppos)
3770 {
3771         struct ftrace_entries_info *info = filp->private_data;
3772         struct trace_array *tr = info->tr;
3773         char buf[64];
3774         int r = 0;
3775         ssize_t ret;
3776
3777         mutex_lock(&trace_types_lock);
3778
3779         if (info->cpu == RING_BUFFER_ALL_CPUS) {
3780                 int cpu, buf_size_same;
3781                 unsigned long size;
3782
3783                 size = 0;
3784                 buf_size_same = 1;
3785                 /* check if all cpu sizes are same */
3786                 for_each_tracing_cpu(cpu) {
3787                         /* fill in the size from first enabled cpu */
3788                         if (size == 0)
3789                                 size = tr->data[cpu]->entries;
3790                         if (size != tr->data[cpu]->entries) {
3791                                 buf_size_same = 0;
3792                                 break;
3793                         }
3794                 }
3795
3796                 if (buf_size_same) {
3797                         if (!ring_buffer_expanded)
3798                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
3799                                             size >> 10,
3800                                             trace_buf_size >> 10);
3801                         else
3802                                 r = sprintf(buf, "%lu\n", size >> 10);
3803                 } else
3804                         r = sprintf(buf, "X\n");
3805         } else
3806                 r = sprintf(buf, "%lu\n", tr->data[info->cpu]->entries >> 10);
3807
3808         mutex_unlock(&trace_types_lock);
3809
3810         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3811         return ret;
3812 }
3813
3814 static ssize_t
3815 tracing_entries_write(struct file *filp, const char __user *ubuf,
3816                       size_t cnt, loff_t *ppos)
3817 {
3818         struct ftrace_entries_info *info = filp->private_data;
3819         unsigned long val;
3820         int ret;
3821
3822         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
3823         if (ret)
3824                 return ret;
3825
3826         /* must have at least 1 entry */
3827         if (!val)
3828                 return -EINVAL;
3829
3830         /* value is in KB */
3831         val <<= 10;
3832
3833         ret = tracing_resize_ring_buffer(val, info->cpu);
3834         if (ret < 0)
3835                 return ret;
3836
3837         *ppos += cnt;
3838
3839         return cnt;
3840 }
3841
3842 static int
3843 tracing_entries_release(struct inode *inode, struct file *filp)
3844 {
3845         struct ftrace_entries_info *info = filp->private_data;
3846
3847         kfree(info);
3848
3849         return 0;
3850 }
3851
3852 static ssize_t
3853 tracing_total_entries_read(struct file *filp, char __user *ubuf,
3854                                 size_t cnt, loff_t *ppos)
3855 {
3856         struct trace_array *tr = filp->private_data;
3857         char buf[64];
3858         int r, cpu;
3859         unsigned long size = 0, expanded_size = 0;
3860
3861         mutex_lock(&trace_types_lock);
3862         for_each_tracing_cpu(cpu) {
3863                 size += tr->data[cpu]->entries >> 10;
3864                 if (!ring_buffer_expanded)
3865                         expanded_size += trace_buf_size >> 10;
3866         }
3867         if (ring_buffer_expanded)
3868                 r = sprintf(buf, "%lu\n", size);
3869         else
3870                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
3871         mutex_unlock(&trace_types_lock);
3872
3873         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3874 }
3875
3876 static ssize_t
3877 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
3878                           size_t cnt, loff_t *ppos)
3879 {
3880         /*
3881          * There is no need to read what the user has written, this function
3882          * is just to make sure that there is no error when "echo" is used
3883          */
3884
3885         *ppos += cnt;
3886
3887         return cnt;
3888 }
3889
3890 static int
3891 tracing_free_buffer_release(struct inode *inode, struct file *filp)
3892 {
3893         /* disable tracing ? */
3894         if (trace_flags & TRACE_ITER_STOP_ON_FREE)
3895                 tracing_off();
3896         /* resize the ring buffer to 0 */
3897         tracing_resize_ring_buffer(0, RING_BUFFER_ALL_CPUS);
3898
3899         return 0;
3900 }
3901
3902 static ssize_t
3903 tracing_mark_write(struct file *filp, const char __user *ubuf,
3904                                         size_t cnt, loff_t *fpos)
3905 {
3906         unsigned long addr = (unsigned long)ubuf;
3907         struct ring_buffer_event *event;
3908         struct ring_buffer *buffer;
3909         struct print_entry *entry;
3910         unsigned long irq_flags;
3911         struct page *pages[2];
3912         void *map_page[2];
3913         int nr_pages = 1;
3914         ssize_t written;
3915         int offset;
3916         int size;
3917         int len;
3918         int ret;
3919         int i;
3920
3921         if (tracing_disabled)
3922                 return -EINVAL;
3923
3924         if (!(trace_flags & TRACE_ITER_MARKERS))
3925                 return -EINVAL;
3926
3927         if (cnt > TRACE_BUF_SIZE)
3928                 cnt = TRACE_BUF_SIZE;
3929
3930         /*
3931          * Userspace is injecting traces into the kernel trace buffer.
3932          * We want to be as non intrusive as possible.
3933          * To do so, we do not want to allocate any special buffers
3934          * or take any locks, but instead write the userspace data
3935          * straight into the ring buffer.
3936          *
3937          * First we need to pin the userspace buffer into memory,
3938          * which, most likely it is, because it just referenced it.
3939          * But there's no guarantee that it is. By using get_user_pages_fast()
3940          * and kmap_atomic/kunmap_atomic() we can get access to the
3941          * pages directly. We then write the data directly into the
3942          * ring buffer.
3943          */
3944         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
3945
3946         /* check if we cross pages */
3947         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
3948                 nr_pages = 2;
3949
3950         offset = addr & (PAGE_SIZE - 1);
3951         addr &= PAGE_MASK;
3952
3953         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
3954         if (ret < nr_pages) {
3955                 while (--ret >= 0)
3956                         put_page(pages[ret]);
3957                 written = -EFAULT;
3958                 goto out;
3959         }
3960
3961         for (i = 0; i < nr_pages; i++)
3962                 map_page[i] = kmap_atomic(pages[i]);
3963
3964         local_save_flags(irq_flags);
3965         size = sizeof(*entry) + cnt + 2; /* possible \n added */
3966         buffer = global_trace.buffer;
3967         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3968                                           irq_flags, preempt_count());
3969         if (!event) {
3970                 /* Ring buffer disabled, return as if not open for write */
3971                 written = -EBADF;
3972                 goto out_unlock;
3973         }
3974
3975         entry = ring_buffer_event_data(event);
3976         entry->ip = _THIS_IP_;
3977
3978         if (nr_pages == 2) {
3979                 len = PAGE_SIZE - offset;
3980                 memcpy(&entry->buf, map_page[0] + offset, len);
3981                 memcpy(&entry->buf[len], map_page[1], cnt - len);
3982         } else
3983                 memcpy(&entry->buf, map_page[0] + offset, cnt);
3984
3985         if (entry->buf[cnt - 1] != '\n') {
3986                 entry->buf[cnt] = '\n';
3987                 entry->buf[cnt + 1] = '\0';
3988         } else
3989                 entry->buf[cnt] = '\0';
3990
3991         __buffer_unlock_commit(buffer, event);
3992
3993         written = cnt;
3994
3995         *fpos += written;
3996
3997  out_unlock:
3998         for (i = 0; i < nr_pages; i++){
3999                 kunmap_atomic(map_page[i]);
4000                 put_page(pages[i]);
4001         }
4002  out:
4003         return written;
4004 }
4005
4006 static int tracing_clock_show(struct seq_file *m, void *v)
4007 {
4008         int i;
4009
4010         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
4011                 seq_printf(m,
4012                         "%s%s%s%s", i ? " " : "",
4013                         i == trace_clock_id ? "[" : "", trace_clocks[i].name,
4014                         i == trace_clock_id ? "]" : "");
4015         seq_putc(m, '\n');
4016
4017         return 0;
4018 }
4019
4020 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
4021                                    size_t cnt, loff_t *fpos)
4022 {
4023         char buf[64];
4024         const char *clockstr;
4025         int i;
4026
4027         if (cnt >= sizeof(buf))
4028                 return -EINVAL;
4029
4030         if (copy_from_user(&buf, ubuf, cnt))
4031                 return -EFAULT;
4032
4033         buf[cnt] = 0;
4034
4035         clockstr = strstrip(buf);
4036
4037         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
4038                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
4039                         break;
4040         }
4041         if (i == ARRAY_SIZE(trace_clocks))
4042                 return -EINVAL;
4043
4044         trace_clock_id = i;
4045
4046         mutex_lock(&trace_types_lock);
4047
4048         ring_buffer_set_clock(global_trace.buffer, trace_clocks[i].func);
4049         if (max_tr.buffer)
4050                 ring_buffer_set_clock(max_tr.buffer, trace_clocks[i].func);
4051
4052         /*
4053          * New clock may not be consistent with the previous clock.
4054          * Reset the buffer so that it doesn't have incomparable timestamps.
4055          */
4056         tracing_reset_online_cpus(&global_trace);
4057         tracing_reset_online_cpus(&max_tr);
4058
4059         mutex_unlock(&trace_types_lock);
4060
4061         *fpos += cnt;
4062
4063         return cnt;
4064 }
4065
4066 static int tracing_clock_open(struct inode *inode, struct file *file)
4067 {
4068         if (tracing_disabled)
4069                 return -ENODEV;
4070         return single_open(file, tracing_clock_show, NULL);
4071 }
4072
4073 static const struct file_operations tracing_max_lat_fops = {
4074         .open           = tracing_open_generic,
4075         .read           = tracing_max_lat_read,
4076         .write          = tracing_max_lat_write,
4077         .llseek         = generic_file_llseek,
4078 };
4079
4080 static const struct file_operations set_tracer_fops = {
4081         .open           = tracing_open_generic,
4082         .read           = tracing_set_trace_read,
4083         .write          = tracing_set_trace_write,
4084         .llseek         = generic_file_llseek,
4085 };
4086
4087 static const struct file_operations tracing_pipe_fops = {
4088         .open           = tracing_open_pipe,
4089         .poll           = tracing_poll_pipe,
4090         .read           = tracing_read_pipe,
4091         .splice_read    = tracing_splice_read_pipe,
4092         .release        = tracing_release_pipe,
4093         .llseek         = no_llseek,
4094 };
4095
4096 static const struct file_operations tracing_entries_fops = {
4097         .open           = tracing_entries_open,
4098         .read           = tracing_entries_read,
4099         .write          = tracing_entries_write,
4100         .release        = tracing_entries_release,
4101         .llseek         = generic_file_llseek,
4102 };
4103
4104 static const struct file_operations tracing_total_entries_fops = {
4105         .open           = tracing_open_generic,
4106         .read           = tracing_total_entries_read,
4107         .llseek         = generic_file_llseek,
4108 };
4109
4110 static const struct file_operations tracing_free_buffer_fops = {
4111         .write          = tracing_free_buffer_write,
4112         .release        = tracing_free_buffer_release,
4113 };
4114
4115 static const struct file_operations tracing_mark_fops = {
4116         .open           = tracing_open_generic,
4117         .write          = tracing_mark_write,
4118         .llseek         = generic_file_llseek,
4119 };
4120
4121 static const struct file_operations trace_clock_fops = {
4122         .open           = tracing_clock_open,
4123         .read           = seq_read,
4124         .llseek         = seq_lseek,
4125         .release        = single_release,
4126         .write          = tracing_clock_write,
4127 };
4128
4129 struct ftrace_buffer_info {
4130         struct trace_array      *tr;
4131         void                    *spare;
4132         int                     cpu;
4133         unsigned int            read;
4134 };
4135
4136 static int tracing_buffers_open(struct inode *inode, struct file *filp)
4137 {
4138         int cpu = (int)(long)inode->i_private;
4139         struct ftrace_buffer_info *info;
4140
4141         if (tracing_disabled)
4142                 return -ENODEV;
4143
4144         info = kzalloc(sizeof(*info), GFP_KERNEL);
4145         if (!info)
4146                 return -ENOMEM;
4147
4148         info->tr        = &global_trace;
4149         info->cpu       = cpu;
4150         info->spare     = NULL;
4151         /* Force reading ring buffer for first read */
4152         info->read      = (unsigned int)-1;
4153
4154         filp->private_data = info;
4155
4156         return nonseekable_open(inode, filp);
4157 }
4158
4159 static ssize_t
4160 tracing_buffers_read(struct file *filp, char __user *ubuf,
4161                      size_t count, loff_t *ppos)
4162 {
4163         struct ftrace_buffer_info *info = filp->private_data;
4164         ssize_t ret;
4165         size_t size;
4166
4167         if (!count)
4168                 return 0;
4169
4170         if (!info->spare)
4171                 info->spare = ring_buffer_alloc_read_page(info->tr->buffer, info->cpu);
4172         if (!info->spare)
4173                 return -ENOMEM;
4174
4175         /* Do we have previous read data to read? */
4176         if (info->read < PAGE_SIZE)
4177                 goto read;
4178
4179         trace_access_lock(info->cpu);
4180         ret = ring_buffer_read_page(info->tr->buffer,
4181                                     &info->spare,
4182                                     count,
4183                                     info->cpu, 0);
4184         trace_access_unlock(info->cpu);
4185         if (ret < 0)
4186                 return 0;
4187
4188         info->read = 0;
4189
4190 read:
4191         size = PAGE_SIZE - info->read;
4192         if (size > count)
4193                 size = count;
4194
4195         ret = copy_to_user(ubuf, info->spare + info->read, size);
4196         if (ret == size)
4197                 return -EFAULT;
4198         size -= ret;
4199
4200         *ppos += size;
4201         info->read += size;
4202
4203         return size;
4204 }
4205
4206 static int tracing_buffers_release(struct inode *inode, struct file *file)
4207 {
4208         struct ftrace_buffer_info *info = file->private_data;
4209
4210         if (info->spare)
4211                 ring_buffer_free_read_page(info->tr->buffer, info->spare);
4212         kfree(info);
4213
4214         return 0;
4215 }
4216
4217 struct buffer_ref {
4218         struct ring_buffer      *buffer;
4219         void                    *page;
4220         int                     ref;
4221 };
4222
4223 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
4224                                     struct pipe_buffer *buf)
4225 {
4226         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
4227
4228         if (--ref->ref)
4229                 return;
4230
4231         ring_buffer_free_read_page(ref->buffer, ref->page);
4232         kfree(ref);
4233         buf->private = 0;
4234 }
4235
4236 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
4237                                 struct pipe_buffer *buf)
4238 {
4239         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
4240
4241         ref->ref++;
4242 }
4243
4244 /* Pipe buffer operations for a buffer. */
4245 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
4246         .can_merge              = 0,
4247         .map                    = generic_pipe_buf_map,
4248         .unmap                  = generic_pipe_buf_unmap,
4249         .confirm                = generic_pipe_buf_confirm,
4250         .release                = buffer_pipe_buf_release,
4251         .steal                  = generic_pipe_buf_steal,
4252         .get                    = buffer_pipe_buf_get,
4253 };
4254
4255 /*
4256  * Callback from splice_to_pipe(), if we need to release some pages
4257  * at the end of the spd in case we error'ed out in filling the pipe.
4258  */
4259 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
4260 {
4261         struct buffer_ref *ref =
4262                 (struct buffer_ref *)spd->partial[i].private;
4263
4264         if (--ref->ref)
4265                 return;
4266
4267         ring_buffer_free_read_page(ref->buffer, ref->page);
4268         kfree(ref);
4269         spd->partial[i].private = 0;
4270 }
4271
4272 static ssize_t
4273 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
4274                             struct pipe_inode_info *pipe, size_t len,
4275                             unsigned int flags)
4276 {
4277         struct ftrace_buffer_info *info = file->private_data;
4278         struct partial_page partial_def[PIPE_DEF_BUFFERS];
4279         struct page *pages_def[PIPE_DEF_BUFFERS];
4280         struct splice_pipe_desc spd = {
4281                 .pages          = pages_def,
4282                 .partial        = partial_def,
4283                 .nr_pages_max   = PIPE_DEF_BUFFERS,
4284                 .flags          = flags,
4285                 .ops            = &buffer_pipe_buf_ops,
4286                 .spd_release    = buffer_spd_release,
4287         };
4288         struct buffer_ref *ref;
4289         int entries, size, i;
4290         size_t ret;
4291
4292         if (splice_grow_spd(pipe, &spd))
4293                 return -ENOMEM;
4294
4295         if (*ppos & (PAGE_SIZE - 1)) {
4296                 ret = -EINVAL;
4297                 goto out;
4298         }
4299
4300         if (len & (PAGE_SIZE - 1)) {
4301                 if (len < PAGE_SIZE) {
4302                         ret = -EINVAL;
4303                         goto out;
4304                 }
4305                 len &= PAGE_MASK;
4306         }
4307
4308         trace_access_lock(info->cpu);
4309         entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
4310
4311         for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) {
4312                 struct page *page;
4313                 int r;
4314
4315                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
4316                 if (!ref)
4317                         break;
4318
4319                 ref->ref = 1;
4320                 ref->buffer = info->tr->buffer;
4321                 ref->page = ring_buffer_alloc_read_page(ref->buffer, info->cpu);
4322                 if (!ref->page) {
4323                         kfree(ref);
4324                         break;
4325                 }
4326
4327                 r = ring_buffer_read_page(ref->buffer, &ref->page,
4328                                           len, info->cpu, 1);
4329                 if (r < 0) {
4330                         ring_buffer_free_read_page(ref->buffer, ref->page);
4331                         kfree(ref);
4332                         break;
4333                 }
4334
4335                 /*
4336                  * zero out any left over data, this is going to
4337                  * user land.
4338                  */
4339                 size = ring_buffer_page_len(ref->page);
4340                 if (size < PAGE_SIZE)
4341                         memset(ref->page + size, 0, PAGE_SIZE - size);
4342
4343                 page = virt_to_page(ref->page);
4344
4345                 spd.pages[i] = page;
4346                 spd.partial[i].len = PAGE_SIZE;
4347                 spd.partial[i].offset = 0;
4348                 spd.partial[i].private = (unsigned long)ref;
4349                 spd.nr_pages++;
4350                 *ppos += PAGE_SIZE;
4351
4352                 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
4353         }
4354
4355         trace_access_unlock(info->cpu);
4356         spd.nr_pages = i;
4357
4358         /* did we read anything? */
4359         if (!spd.nr_pages) {
4360                 if (flags & SPLICE_F_NONBLOCK)
4361                         ret = -EAGAIN;
4362                 else
4363                         ret = 0;
4364                 /* TODO: block */
4365                 goto out;
4366         }
4367
4368         ret = splice_to_pipe(pipe, &spd);
4369         splice_shrink_spd(&spd);
4370 out:
4371         return ret;
4372 }
4373
4374 static const struct file_operations tracing_buffers_fops = {
4375         .open           = tracing_buffers_open,
4376         .read           = tracing_buffers_read,
4377         .release        = tracing_buffers_release,
4378         .splice_read    = tracing_buffers_splice_read,
4379         .llseek         = no_llseek,
4380 };
4381
4382 static ssize_t
4383 tracing_stats_read(struct file *filp, char __user *ubuf,
4384                    size_t count, loff_t *ppos)
4385 {
4386         unsigned long cpu = (unsigned long)filp->private_data;
4387         struct trace_array *tr = &global_trace;
4388         struct trace_seq *s;
4389         unsigned long cnt;
4390         unsigned long long t;
4391         unsigned long usec_rem;
4392
4393         s = kmalloc(sizeof(*s), GFP_KERNEL);
4394         if (!s)
4395                 return -ENOMEM;
4396
4397         trace_seq_init(s);
4398
4399         cnt = ring_buffer_entries_cpu(tr->buffer, cpu);
4400         trace_seq_printf(s, "entries: %ld\n", cnt);
4401
4402         cnt = ring_buffer_overrun_cpu(tr->buffer, cpu);
4403         trace_seq_printf(s, "overrun: %ld\n", cnt);
4404
4405         cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu);
4406         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
4407
4408         cnt = ring_buffer_bytes_cpu(tr->buffer, cpu);
4409         trace_seq_printf(s, "bytes: %ld\n", cnt);
4410
4411         if (trace_clocks[trace_clock_id].in_ns) {
4412                 /* local or global for trace_clock */
4413                 t = ns2usecs(ring_buffer_oldest_event_ts(tr->buffer, cpu));
4414                 usec_rem = do_div(t, USEC_PER_SEC);
4415                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
4416                                                                 t, usec_rem);
4417
4418                 t = ns2usecs(ring_buffer_time_stamp(tr->buffer, cpu));
4419                 usec_rem = do_div(t, USEC_PER_SEC);
4420                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
4421         } else {
4422                 /* counter or tsc mode for trace_clock */
4423                 trace_seq_printf(s, "oldest event ts: %llu\n",
4424                                 ring_buffer_oldest_event_ts(tr->buffer, cpu));
4425
4426                 trace_seq_printf(s, "now ts: %llu\n",
4427                                 ring_buffer_time_stamp(tr->buffer, cpu));
4428         }
4429
4430         cnt = ring_buffer_dropped_events_cpu(tr->buffer, cpu);
4431         trace_seq_printf(s, "dropped events: %ld\n", cnt);
4432
4433         count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
4434
4435         kfree(s);
4436
4437         return count;
4438 }
4439
4440 static const struct file_operations tracing_stats_fops = {
4441         .open           = tracing_open_generic,
4442         .read           = tracing_stats_read,
4443         .llseek         = generic_file_llseek,
4444 };
4445
4446 #ifdef CONFIG_DYNAMIC_FTRACE
4447
4448 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
4449 {
4450         return 0;
4451 }
4452
4453 static ssize_t
4454 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
4455                   size_t cnt, loff_t *ppos)
4456 {
4457         static char ftrace_dyn_info_buffer[1024];
4458         static DEFINE_MUTEX(dyn_info_mutex);
4459         unsigned long *p = filp->private_data;
4460         char *buf = ftrace_dyn_info_buffer;
4461         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
4462         int r;
4463
4464         mutex_lock(&dyn_info_mutex);
4465         r = sprintf(buf, "%ld ", *p);
4466
4467         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
4468         buf[r++] = '\n';
4469
4470         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4471
4472         mutex_unlock(&dyn_info_mutex);
4473
4474         return r;
4475 }
4476
4477 static const struct file_operations tracing_dyn_info_fops = {
4478         .open           = tracing_open_generic,
4479         .read           = tracing_read_dyn_info,
4480         .llseek         = generic_file_llseek,
4481 };
4482 #endif
4483
4484 static struct dentry *d_tracer;
4485
4486 struct dentry *tracing_init_dentry(void)
4487 {
4488         static int once;
4489
4490         if (d_tracer)
4491                 return d_tracer;
4492
4493         if (!debugfs_initialized())
4494                 return NULL;
4495
4496         d_tracer = debugfs_create_dir("tracing", NULL);
4497
4498         if (!d_tracer && !once) {
4499                 once = 1;
4500                 pr_warning("Could not create debugfs directory 'tracing'\n");
4501                 return NULL;
4502         }
4503
4504         return d_tracer;
4505 }
4506
4507 static struct dentry *d_percpu;
4508
4509 static struct dentry *tracing_dentry_percpu(void)
4510 {
4511         static int once;
4512         struct dentry *d_tracer;
4513
4514         if (d_percpu)
4515                 return d_percpu;
4516
4517         d_tracer = tracing_init_dentry();
4518
4519         if (!d_tracer)
4520                 return NULL;
4521
4522         d_percpu = debugfs_create_dir("per_cpu", d_tracer);
4523
4524         if (!d_percpu && !once) {
4525                 once = 1;
4526                 pr_warning("Could not create debugfs directory 'per_cpu'\n");
4527                 return NULL;
4528         }
4529
4530         return d_percpu;
4531 }
4532
4533 static void tracing_init_debugfs_percpu(long cpu)
4534 {
4535         struct dentry *d_percpu = tracing_dentry_percpu();
4536         struct dentry *d_cpu;
4537         char cpu_dir[30]; /* 30 characters should be more than enough */
4538
4539         if (!d_percpu)
4540                 return;
4541
4542         snprintf(cpu_dir, 30, "cpu%ld", cpu);
4543         d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
4544         if (!d_cpu) {
4545                 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
4546                 return;
4547         }
4548
4549         /* per cpu trace_pipe */
4550         trace_create_file("trace_pipe", 0444, d_cpu,
4551                         (void *) cpu, &tracing_pipe_fops);
4552
4553         /* per cpu trace */
4554         trace_create_file("trace", 0644, d_cpu,
4555                         (void *) cpu, &tracing_fops);
4556
4557         trace_create_file("trace_pipe_raw", 0444, d_cpu,
4558                         (void *) cpu, &tracing_buffers_fops);
4559
4560         trace_create_file("stats", 0444, d_cpu,
4561                         (void *) cpu, &tracing_stats_fops);
4562
4563         trace_create_file("buffer_size_kb", 0444, d_cpu,
4564                         (void *) cpu, &tracing_entries_fops);
4565 }
4566
4567 #ifdef CONFIG_FTRACE_SELFTEST
4568 /* Let selftest have access to static functions in this file */
4569 #include "trace_selftest.c"
4570 #endif
4571
4572 struct trace_option_dentry {
4573         struct tracer_opt               *opt;
4574         struct tracer_flags             *flags;
4575         struct dentry                   *entry;
4576 };
4577
4578 static ssize_t
4579 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
4580                         loff_t *ppos)
4581 {
4582         struct trace_option_dentry *topt = filp->private_data;
4583         char *buf;
4584
4585         if (topt->flags->val & topt->opt->bit)
4586                 buf = "1\n";
4587         else
4588                 buf = "0\n";
4589
4590         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
4591 }
4592
4593 static ssize_t
4594 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
4595                          loff_t *ppos)
4596 {
4597         struct trace_option_dentry *topt = filp->private_data;
4598         unsigned long val;
4599         int ret;
4600
4601         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4602         if (ret)
4603                 return ret;
4604
4605         if (val != 0 && val != 1)
4606                 return -EINVAL;
4607
4608         if (!!(topt->flags->val & topt->opt->bit) != val) {
4609                 mutex_lock(&trace_types_lock);
4610                 ret = __set_tracer_option(current_trace, topt->flags,
4611                                           topt->opt, !val);
4612                 mutex_unlock(&trace_types_lock);
4613                 if (ret)
4614                         return ret;
4615         }
4616
4617         *ppos += cnt;
4618
4619         return cnt;
4620 }
4621
4622
4623 static const struct file_operations trace_options_fops = {
4624         .open = tracing_open_generic,
4625         .read = trace_options_read,
4626         .write = trace_options_write,
4627         .llseek = generic_file_llseek,
4628 };
4629
4630 static ssize_t
4631 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
4632                         loff_t *ppos)
4633 {
4634         long index = (long)filp->private_data;
4635         char *buf;
4636
4637         if (trace_flags & (1 << index))
4638                 buf = "1\n";
4639         else
4640                 buf = "0\n";
4641
4642         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
4643 }
4644
4645 static ssize_t
4646 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
4647                          loff_t *ppos)
4648 {
4649         long index = (long)filp->private_data;
4650         unsigned long val;
4651         int ret;
4652
4653         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4654         if (ret)
4655                 return ret;
4656
4657         if (val != 0 && val != 1)
4658                 return -EINVAL;
4659         set_tracer_flags(1 << index, val);
4660
4661         *ppos += cnt;
4662
4663         return cnt;
4664 }
4665
4666 static const struct file_operations trace_options_core_fops = {
4667         .open = tracing_open_generic,
4668         .read = trace_options_core_read,
4669         .write = trace_options_core_write,
4670         .llseek = generic_file_llseek,
4671 };
4672
4673 struct dentry *trace_create_file(const char *name,
4674                                  umode_t mode,
4675                                  struct dentry *parent,
4676                                  void *data,
4677                                  const struct file_operations *fops)
4678 {
4679         struct dentry *ret;
4680
4681         ret = debugfs_create_file(name, mode, parent, data, fops);
4682         if (!ret)
4683                 pr_warning("Could not create debugfs '%s' entry\n", name);
4684
4685         return ret;
4686 }
4687
4688
4689 static struct dentry *trace_options_init_dentry(void)
4690 {
4691         struct dentry *d_tracer;
4692         static struct dentry *t_options;
4693
4694         if (t_options)
4695                 return t_options;
4696
4697         d_tracer = tracing_init_dentry();
4698         if (!d_tracer)
4699                 return NULL;
4700
4701         t_options = debugfs_create_dir("options", d_tracer);
4702         if (!t_options) {
4703                 pr_warning("Could not create debugfs directory 'options'\n");
4704                 return NULL;
4705         }
4706
4707         return t_options;
4708 }
4709
4710 static void
4711 create_trace_option_file(struct trace_option_dentry *topt,
4712                          struct tracer_flags *flags,
4713                          struct tracer_opt *opt)
4714 {
4715         struct dentry *t_options;
4716
4717         t_options = trace_options_init_dentry();
4718         if (!t_options)
4719                 return;
4720
4721         topt->flags = flags;
4722         topt->opt = opt;
4723
4724         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
4725                                     &trace_options_fops);
4726
4727 }
4728
4729 static struct trace_option_dentry *
4730 create_trace_option_files(struct tracer *tracer)
4731 {
4732         struct trace_option_dentry *topts;
4733         struct tracer_flags *flags;
4734         struct tracer_opt *opts;
4735         int cnt;
4736
4737         if (!tracer)
4738                 return NULL;
4739
4740         flags = tracer->flags;
4741
4742         if (!flags || !flags->opts)
4743                 return NULL;
4744
4745         opts = flags->opts;
4746
4747         for (cnt = 0; opts[cnt].name; cnt++)
4748                 ;
4749
4750         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
4751         if (!topts)
4752                 return NULL;
4753
4754         for (cnt = 0; opts[cnt].name; cnt++)
4755                 create_trace_option_file(&topts[cnt], flags,
4756                                          &opts[cnt]);
4757
4758         return topts;
4759 }
4760
4761 static void
4762 destroy_trace_option_files(struct trace_option_dentry *topts)
4763 {
4764         int cnt;
4765
4766         if (!topts)
4767                 return;
4768
4769         for (cnt = 0; topts[cnt].opt; cnt++) {
4770                 if (topts[cnt].entry)
4771                         debugfs_remove(topts[cnt].entry);
4772         }
4773
4774         kfree(topts);
4775 }
4776
4777 static struct dentry *
4778 create_trace_option_core_file(const char *option, long index)
4779 {
4780         struct dentry *t_options;
4781
4782         t_options = trace_options_init_dentry();
4783         if (!t_options)
4784                 return NULL;
4785
4786         return trace_create_file(option, 0644, t_options, (void *)index,
4787                                     &trace_options_core_fops);
4788 }
4789
4790 static __init void create_trace_options_dir(void)
4791 {
4792         struct dentry *t_options;
4793         int i;
4794
4795         t_options = trace_options_init_dentry();
4796         if (!t_options)
4797                 return;
4798
4799         for (i = 0; trace_options[i]; i++)
4800                 create_trace_option_core_file(trace_options[i], i);
4801 }
4802
4803 static ssize_t
4804 rb_simple_read(struct file *filp, char __user *ubuf,
4805                size_t cnt, loff_t *ppos)
4806 {
4807         struct trace_array *tr = filp->private_data;
4808         struct ring_buffer *buffer = tr->buffer;
4809         char buf[64];
4810         int r;
4811
4812         if (buffer)
4813                 r = ring_buffer_record_is_on(buffer);
4814         else
4815                 r = 0;
4816
4817         r = sprintf(buf, "%d\n", r);
4818
4819         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4820 }
4821
4822 static ssize_t
4823 rb_simple_write(struct file *filp, const char __user *ubuf,
4824                 size_t cnt, loff_t *ppos)
4825 {
4826         struct trace_array *tr = filp->private_data;
4827         struct ring_buffer *buffer = tr->buffer;
4828         unsigned long val;
4829         int ret;
4830
4831         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4832         if (ret)
4833                 return ret;
4834
4835         if (buffer) {
4836                 mutex_lock(&trace_types_lock);
4837                 if (val) {
4838                         ring_buffer_record_on(buffer);
4839                         if (current_trace->start)
4840                                 current_trace->start(tr);
4841                 } else {
4842                         ring_buffer_record_off(buffer);
4843                         if (current_trace->stop)
4844                                 current_trace->stop(tr);
4845                 }
4846                 mutex_unlock(&trace_types_lock);
4847         }
4848
4849         (*ppos)++;
4850
4851         return cnt;
4852 }
4853
4854 static const struct file_operations rb_simple_fops = {
4855         .open           = tracing_open_generic,
4856         .read           = rb_simple_read,
4857         .write          = rb_simple_write,
4858         .llseek         = default_llseek,
4859 };
4860
4861 static __init int tracer_init_debugfs(void)
4862 {
4863         struct dentry *d_tracer;
4864         int cpu;
4865
4866         trace_access_lock_init();
4867
4868         d_tracer = tracing_init_dentry();
4869
4870         trace_create_file("trace_options", 0644, d_tracer,
4871                         NULL, &tracing_iter_fops);
4872
4873         trace_create_file("tracing_cpumask", 0644, d_tracer,
4874                         NULL, &tracing_cpumask_fops);
4875
4876         trace_create_file("trace", 0644, d_tracer,
4877                         (void *) TRACE_PIPE_ALL_CPU, &tracing_fops);
4878
4879         trace_create_file("available_tracers", 0444, d_tracer,
4880                         &global_trace, &show_traces_fops);
4881
4882         trace_create_file("current_tracer", 0644, d_tracer,
4883                         &global_trace, &set_tracer_fops);
4884
4885 #ifdef CONFIG_TRACER_MAX_TRACE
4886         trace_create_file("tracing_max_latency", 0644, d_tracer,
4887                         &tracing_max_latency, &tracing_max_lat_fops);
4888 #endif
4889
4890         trace_create_file("tracing_thresh", 0644, d_tracer,
4891                         &tracing_thresh, &tracing_max_lat_fops);
4892
4893         trace_create_file("README", 0444, d_tracer,
4894                         NULL, &tracing_readme_fops);
4895
4896         trace_create_file("trace_pipe", 0444, d_tracer,
4897                         (void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops);
4898
4899         trace_create_file("buffer_size_kb", 0644, d_tracer,
4900                         (void *) RING_BUFFER_ALL_CPUS, &tracing_entries_fops);
4901
4902         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
4903                         &global_trace, &tracing_total_entries_fops);
4904
4905         trace_create_file("free_buffer", 0644, d_tracer,
4906                         &global_trace, &tracing_free_buffer_fops);
4907
4908         trace_create_file("trace_marker", 0220, d_tracer,
4909                         NULL, &tracing_mark_fops);
4910
4911         trace_create_file("saved_cmdlines", 0444, d_tracer,
4912                         NULL, &tracing_saved_cmdlines_fops);
4913
4914         trace_create_file("trace_clock", 0644, d_tracer, NULL,
4915                           &trace_clock_fops);
4916
4917         trace_create_file("tracing_on", 0644, d_tracer,
4918                             &global_trace, &rb_simple_fops);
4919
4920 #ifdef CONFIG_DYNAMIC_FTRACE
4921         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
4922                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
4923 #endif
4924
4925         create_trace_options_dir();
4926
4927         for_each_tracing_cpu(cpu)
4928                 tracing_init_debugfs_percpu(cpu);
4929
4930         return 0;
4931 }
4932
4933 static int trace_panic_handler(struct notifier_block *this,
4934                                unsigned long event, void *unused)
4935 {
4936         if (ftrace_dump_on_oops)
4937                 ftrace_dump(ftrace_dump_on_oops);
4938         return NOTIFY_OK;
4939 }
4940
4941 static struct notifier_block trace_panic_notifier = {
4942         .notifier_call  = trace_panic_handler,
4943         .next           = NULL,
4944         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
4945 };
4946
4947 static int trace_die_handler(struct notifier_block *self,
4948                              unsigned long val,
4949                              void *data)
4950 {
4951         switch (val) {
4952         case DIE_OOPS:
4953                 if (ftrace_dump_on_oops)
4954                         ftrace_dump(ftrace_dump_on_oops);
4955                 break;
4956         default:
4957                 break;
4958         }
4959         return NOTIFY_OK;
4960 }
4961
4962 static struct notifier_block trace_die_notifier = {
4963         .notifier_call = trace_die_handler,
4964         .priority = 200
4965 };
4966
4967 /*
4968  * printk is set to max of 1024, we really don't need it that big.
4969  * Nothing should be printing 1000 characters anyway.
4970  */
4971 #define TRACE_MAX_PRINT         1000
4972
4973 /*
4974  * Define here KERN_TRACE so that we have one place to modify
4975  * it if we decide to change what log level the ftrace dump
4976  * should be at.
4977  */
4978 #define KERN_TRACE              KERN_EMERG
4979
4980 void
4981 trace_printk_seq(struct trace_seq *s)
4982 {
4983         /* Probably should print a warning here. */
4984         if (s->len >= 1000)
4985                 s->len = 1000;
4986
4987         /* should be zero ended, but we are paranoid. */
4988         s->buffer[s->len] = 0;
4989
4990         printk(KERN_TRACE "%s", s->buffer);
4991
4992         trace_seq_init(s);
4993 }
4994
4995 void trace_init_global_iter(struct trace_iterator *iter)
4996 {
4997         iter->tr = &global_trace;
4998         iter->trace = current_trace;
4999         iter->cpu_file = TRACE_PIPE_ALL_CPU;
5000 }
5001
5002 static void
5003 __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
5004 {
5005         static arch_spinlock_t ftrace_dump_lock =
5006                 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
5007         /* use static because iter can be a bit big for the stack */
5008         static struct trace_iterator iter;
5009         unsigned int old_userobj;
5010         static int dump_ran;
5011         unsigned long flags;
5012         int cnt = 0, cpu;
5013
5014         /* only one dump */
5015         local_irq_save(flags);
5016         arch_spin_lock(&ftrace_dump_lock);
5017         if (dump_ran)
5018                 goto out;
5019
5020         dump_ran = 1;
5021
5022         tracing_off();
5023
5024         /* Did function tracer already get disabled? */
5025         if (ftrace_is_dead()) {
5026                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
5027                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
5028         }
5029
5030         if (disable_tracing)
5031                 ftrace_kill();
5032
5033         trace_init_global_iter(&iter);
5034
5035         for_each_tracing_cpu(cpu) {
5036                 atomic_inc(&iter.tr->data[cpu]->disabled);
5037         }
5038
5039         old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
5040
5041         /* don't look at user memory in panic mode */
5042         trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
5043
5044         /* Simulate the iterator */
5045         iter.tr = &global_trace;
5046         iter.trace = current_trace;
5047
5048         switch (oops_dump_mode) {
5049         case DUMP_ALL:
5050                 iter.cpu_file = TRACE_PIPE_ALL_CPU;
5051                 break;
5052         case DUMP_ORIG:
5053                 iter.cpu_file = raw_smp_processor_id();
5054                 break;
5055         case DUMP_NONE:
5056                 goto out_enable;
5057         default:
5058                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
5059                 iter.cpu_file = TRACE_PIPE_ALL_CPU;
5060         }
5061
5062         printk(KERN_TRACE "Dumping ftrace buffer:\n");
5063
5064         /*
5065          * We need to stop all tracing on all CPUS to read the
5066          * the next buffer. This is a bit expensive, but is
5067          * not done often. We fill all what we can read,
5068          * and then release the locks again.
5069          */
5070
5071         while (!trace_empty(&iter)) {
5072
5073                 if (!cnt)
5074                         printk(KERN_TRACE "---------------------------------\n");
5075
5076                 cnt++;
5077
5078                 /* reset all but tr, trace, and overruns */
5079                 memset(&iter.seq, 0,
5080                        sizeof(struct trace_iterator) -
5081                        offsetof(struct trace_iterator, seq));
5082                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
5083                 iter.pos = -1;
5084
5085                 if (trace_find_next_entry_inc(&iter) != NULL) {
5086                         int ret;
5087
5088                         ret = print_trace_line(&iter);
5089                         if (ret != TRACE_TYPE_NO_CONSUME)
5090                                 trace_consume(&iter);
5091                 }
5092                 touch_nmi_watchdog();
5093
5094                 trace_printk_seq(&iter.seq);
5095         }
5096
5097         if (!cnt)
5098                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
5099         else
5100                 printk(KERN_TRACE "---------------------------------\n");
5101
5102  out_enable:
5103         /* Re-enable tracing if requested */
5104         if (!disable_tracing) {
5105                 trace_flags |= old_userobj;
5106
5107                 for_each_tracing_cpu(cpu) {
5108                         atomic_dec(&iter.tr->data[cpu]->disabled);
5109                 }
5110                 tracing_on();
5111         }
5112
5113  out:
5114         arch_spin_unlock(&ftrace_dump_lock);
5115         local_irq_restore(flags);
5116 }
5117
5118 /* By default: disable tracing after the dump */
5119 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
5120 {
5121         __ftrace_dump(true, oops_dump_mode);
5122 }
5123 EXPORT_SYMBOL_GPL(ftrace_dump);
5124
5125 __init static int tracer_alloc_buffers(void)
5126 {
5127         int ring_buf_size;
5128         enum ring_buffer_flags rb_flags;
5129         int i;
5130         int ret = -ENOMEM;
5131
5132
5133         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
5134                 goto out;
5135
5136         if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
5137                 goto out_free_buffer_mask;
5138
5139         /* Only allocate trace_printk buffers if a trace_printk exists */
5140         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
5141                 /* Must be called before global_trace.buffer is allocated */
5142                 trace_printk_init_buffers();
5143
5144         /* To save memory, keep the ring buffer size to its minimum */
5145         if (ring_buffer_expanded)
5146                 ring_buf_size = trace_buf_size;
5147         else
5148                 ring_buf_size = 1;
5149
5150         rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
5151
5152         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
5153         cpumask_copy(tracing_cpumask, cpu_all_mask);
5154
5155         /* TODO: make the number of buffers hot pluggable with CPUS */
5156         global_trace.buffer = ring_buffer_alloc(ring_buf_size, rb_flags);
5157         if (!global_trace.buffer) {
5158                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
5159                 WARN_ON(1);
5160                 goto out_free_cpumask;
5161         }
5162         if (global_trace.buffer_disabled)
5163                 tracing_off();
5164
5165
5166 #ifdef CONFIG_TRACER_MAX_TRACE
5167         max_tr.buffer = ring_buffer_alloc(1, rb_flags);
5168         if (!max_tr.buffer) {
5169                 printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
5170                 WARN_ON(1);
5171                 ring_buffer_free(global_trace.buffer);
5172                 goto out_free_cpumask;
5173         }
5174 #endif
5175
5176         /* Allocate the first page for all buffers */
5177         for_each_tracing_cpu(i) {
5178                 global_trace.data[i] = &per_cpu(global_trace_cpu, i);
5179                 max_tr.data[i] = &per_cpu(max_tr_data, i);
5180         }
5181
5182         set_buffer_entries(&global_trace,
5183                            ring_buffer_size(global_trace.buffer, 0));
5184 #ifdef CONFIG_TRACER_MAX_TRACE
5185         set_buffer_entries(&max_tr, 1);
5186 #endif
5187
5188         trace_init_cmdlines();
5189         init_irq_work(&trace_work_wakeup, trace_wake_up);
5190
5191         register_tracer(&nop_trace);
5192         current_trace = &nop_trace;
5193         /* All seems OK, enable tracing */
5194         tracing_disabled = 0;
5195
5196         atomic_notifier_chain_register(&panic_notifier_list,
5197                                        &trace_panic_notifier);
5198
5199         register_die_notifier(&trace_die_notifier);
5200
5201         while (trace_boot_options) {
5202                 char *option;
5203
5204                 option = strsep(&trace_boot_options, ",");
5205                 trace_set_options(option);
5206         }
5207
5208         return 0;
5209
5210 out_free_cpumask:
5211         free_cpumask_var(tracing_cpumask);
5212 out_free_buffer_mask:
5213         free_cpumask_var(tracing_buffer_mask);
5214 out:
5215         return ret;
5216 }
5217
5218 __init static int clear_boot_tracer(void)
5219 {
5220         /*
5221          * The default tracer at boot buffer is an init section.
5222          * This function is called in lateinit. If we did not
5223          * find the boot tracer, then clear it out, to prevent
5224          * later registration from accessing the buffer that is
5225          * about to be freed.
5226          */
5227         if (!default_bootup_tracer)
5228                 return 0;
5229
5230         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
5231                default_bootup_tracer);
5232         default_bootup_tracer = NULL;
5233
5234         return 0;
5235 }
5236
5237 early_initcall(tracer_alloc_buffers);
5238 fs_initcall(tracer_init_debugfs);
5239 late_initcall(clear_boot_tracer);