]> Pileus Git - ~andy/linux/blob - kernel/trace/trace.c
tracing: Fix traceon trigger condition to actually turn tracing on
[~andy/linux] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/pagemap.h>
24 #include <linux/hardirq.h>
25 #include <linux/linkage.h>
26 #include <linux/uaccess.h>
27 #include <linux/kprobes.h>
28 #include <linux/ftrace.h>
29 #include <linux/module.h>
30 #include <linux/percpu.h>
31 #include <linux/splice.h>
32 #include <linux/kdebug.h>
33 #include <linux/string.h>
34 #include <linux/rwsem.h>
35 #include <linux/slab.h>
36 #include <linux/ctype.h>
37 #include <linux/init.h>
38 #include <linux/poll.h>
39 #include <linux/nmi.h>
40 #include <linux/fs.h>
41 #include <linux/sched/rt.h>
42
43 #include "trace.h"
44 #include "trace_output.h"
45
46 /*
47  * On boot up, the ring buffer is set to the minimum size, so that
48  * we do not waste memory on systems that are not using tracing.
49  */
50 bool ring_buffer_expanded;
51
52 /*
53  * We need to change this state when a selftest is running.
54  * A selftest will lurk into the ring-buffer to count the
55  * entries inserted during the selftest although some concurrent
56  * insertions into the ring-buffer such as trace_printk could occurred
57  * at the same time, giving false positive or negative results.
58  */
59 static bool __read_mostly tracing_selftest_running;
60
61 /*
62  * If a tracer is running, we do not want to run SELFTEST.
63  */
64 bool __read_mostly tracing_selftest_disabled;
65
66 /* For tracers that don't implement custom flags */
67 static struct tracer_opt dummy_tracer_opt[] = {
68         { }
69 };
70
71 static struct tracer_flags dummy_tracer_flags = {
72         .val = 0,
73         .opts = dummy_tracer_opt
74 };
75
76 static int dummy_set_flag(u32 old_flags, u32 bit, int set)
77 {
78         return 0;
79 }
80
81 /*
82  * To prevent the comm cache from being overwritten when no
83  * tracing is active, only save the comm when a trace event
84  * occurred.
85  */
86 static DEFINE_PER_CPU(bool, trace_cmdline_save);
87
88 /*
89  * Kill all tracing for good (never come back).
90  * It is initialized to 1 but will turn to zero if the initialization
91  * of the tracer is successful. But that is the only place that sets
92  * this back to zero.
93  */
94 static int tracing_disabled = 1;
95
96 DEFINE_PER_CPU(int, ftrace_cpu_disabled);
97
98 cpumask_var_t __read_mostly     tracing_buffer_mask;
99
100 /*
101  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
102  *
103  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
104  * is set, then ftrace_dump is called. This will output the contents
105  * of the ftrace buffers to the console.  This is very useful for
106  * capturing traces that lead to crashes and outputing it to a
107  * serial console.
108  *
109  * It is default off, but you can enable it with either specifying
110  * "ftrace_dump_on_oops" in the kernel command line, or setting
111  * /proc/sys/kernel/ftrace_dump_on_oops
112  * Set 1 if you want to dump buffers of all CPUs
113  * Set 2 if you want to dump the buffer of the CPU that triggered oops
114  */
115
116 enum ftrace_dump_mode ftrace_dump_on_oops;
117
118 /* When set, tracing will stop when a WARN*() is hit */
119 int __disable_trace_on_warning;
120
121 static int tracing_set_tracer(const char *buf);
122
123 #define MAX_TRACER_SIZE         100
124 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
125 static char *default_bootup_tracer;
126
127 static bool allocate_snapshot;
128
129 static int __init set_cmdline_ftrace(char *str)
130 {
131         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
132         default_bootup_tracer = bootup_tracer_buf;
133         /* We are using ftrace early, expand it */
134         ring_buffer_expanded = true;
135         return 1;
136 }
137 __setup("ftrace=", set_cmdline_ftrace);
138
139 static int __init set_ftrace_dump_on_oops(char *str)
140 {
141         if (*str++ != '=' || !*str) {
142                 ftrace_dump_on_oops = DUMP_ALL;
143                 return 1;
144         }
145
146         if (!strcmp("orig_cpu", str)) {
147                 ftrace_dump_on_oops = DUMP_ORIG;
148                 return 1;
149         }
150
151         return 0;
152 }
153 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
154
155 static int __init stop_trace_on_warning(char *str)
156 {
157         __disable_trace_on_warning = 1;
158         return 1;
159 }
160 __setup("traceoff_on_warning=", stop_trace_on_warning);
161
162 static int __init boot_alloc_snapshot(char *str)
163 {
164         allocate_snapshot = true;
165         /* We also need the main ring buffer expanded */
166         ring_buffer_expanded = true;
167         return 1;
168 }
169 __setup("alloc_snapshot", boot_alloc_snapshot);
170
171
172 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
173 static char *trace_boot_options __initdata;
174
175 static int __init set_trace_boot_options(char *str)
176 {
177         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
178         trace_boot_options = trace_boot_options_buf;
179         return 0;
180 }
181 __setup("trace_options=", set_trace_boot_options);
182
183
184 unsigned long long ns2usecs(cycle_t nsec)
185 {
186         nsec += 500;
187         do_div(nsec, 1000);
188         return nsec;
189 }
190
191 /*
192  * The global_trace is the descriptor that holds the tracing
193  * buffers for the live tracing. For each CPU, it contains
194  * a link list of pages that will store trace entries. The
195  * page descriptor of the pages in the memory is used to hold
196  * the link list by linking the lru item in the page descriptor
197  * to each of the pages in the buffer per CPU.
198  *
199  * For each active CPU there is a data field that holds the
200  * pages for the buffer for that CPU. Each CPU has the same number
201  * of pages allocated for its buffer.
202  */
203 static struct trace_array       global_trace;
204
205 LIST_HEAD(ftrace_trace_arrays);
206
207 int trace_array_get(struct trace_array *this_tr)
208 {
209         struct trace_array *tr;
210         int ret = -ENODEV;
211
212         mutex_lock(&trace_types_lock);
213         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
214                 if (tr == this_tr) {
215                         tr->ref++;
216                         ret = 0;
217                         break;
218                 }
219         }
220         mutex_unlock(&trace_types_lock);
221
222         return ret;
223 }
224
225 static void __trace_array_put(struct trace_array *this_tr)
226 {
227         WARN_ON(!this_tr->ref);
228         this_tr->ref--;
229 }
230
231 void trace_array_put(struct trace_array *this_tr)
232 {
233         mutex_lock(&trace_types_lock);
234         __trace_array_put(this_tr);
235         mutex_unlock(&trace_types_lock);
236 }
237
238 int filter_check_discard(struct ftrace_event_file *file, void *rec,
239                          struct ring_buffer *buffer,
240                          struct ring_buffer_event *event)
241 {
242         if (unlikely(file->flags & FTRACE_EVENT_FL_FILTERED) &&
243             !filter_match_preds(file->filter, rec)) {
244                 ring_buffer_discard_commit(buffer, event);
245                 return 1;
246         }
247
248         return 0;
249 }
250 EXPORT_SYMBOL_GPL(filter_check_discard);
251
252 int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
253                               struct ring_buffer *buffer,
254                               struct ring_buffer_event *event)
255 {
256         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
257             !filter_match_preds(call->filter, rec)) {
258                 ring_buffer_discard_commit(buffer, event);
259                 return 1;
260         }
261
262         return 0;
263 }
264 EXPORT_SYMBOL_GPL(call_filter_check_discard);
265
266 cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
267 {
268         u64 ts;
269
270         /* Early boot up does not have a buffer yet */
271         if (!buf->buffer)
272                 return trace_clock_local();
273
274         ts = ring_buffer_time_stamp(buf->buffer, cpu);
275         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
276
277         return ts;
278 }
279
280 cycle_t ftrace_now(int cpu)
281 {
282         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
283 }
284
285 /**
286  * tracing_is_enabled - Show if global_trace has been disabled
287  *
288  * Shows if the global trace has been enabled or not. It uses the
289  * mirror flag "buffer_disabled" to be used in fast paths such as for
290  * the irqsoff tracer. But it may be inaccurate due to races. If you
291  * need to know the accurate state, use tracing_is_on() which is a little
292  * slower, but accurate.
293  */
294 int tracing_is_enabled(void)
295 {
296         /*
297          * For quick access (irqsoff uses this in fast path), just
298          * return the mirror variable of the state of the ring buffer.
299          * It's a little racy, but we don't really care.
300          */
301         smp_rmb();
302         return !global_trace.buffer_disabled;
303 }
304
305 /*
306  * trace_buf_size is the size in bytes that is allocated
307  * for a buffer. Note, the number of bytes is always rounded
308  * to page size.
309  *
310  * This number is purposely set to a low number of 16384.
311  * If the dump on oops happens, it will be much appreciated
312  * to not have to wait for all that output. Anyway this can be
313  * boot time and run time configurable.
314  */
315 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
316
317 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
318
319 /* trace_types holds a link list of available tracers. */
320 static struct tracer            *trace_types __read_mostly;
321
322 /*
323  * trace_types_lock is used to protect the trace_types list.
324  */
325 DEFINE_MUTEX(trace_types_lock);
326
327 /*
328  * serialize the access of the ring buffer
329  *
330  * ring buffer serializes readers, but it is low level protection.
331  * The validity of the events (which returns by ring_buffer_peek() ..etc)
332  * are not protected by ring buffer.
333  *
334  * The content of events may become garbage if we allow other process consumes
335  * these events concurrently:
336  *   A) the page of the consumed events may become a normal page
337  *      (not reader page) in ring buffer, and this page will be rewrited
338  *      by events producer.
339  *   B) The page of the consumed events may become a page for splice_read,
340  *      and this page will be returned to system.
341  *
342  * These primitives allow multi process access to different cpu ring buffer
343  * concurrently.
344  *
345  * These primitives don't distinguish read-only and read-consume access.
346  * Multi read-only access are also serialized.
347  */
348
349 #ifdef CONFIG_SMP
350 static DECLARE_RWSEM(all_cpu_access_lock);
351 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
352
353 static inline void trace_access_lock(int cpu)
354 {
355         if (cpu == RING_BUFFER_ALL_CPUS) {
356                 /* gain it for accessing the whole ring buffer. */
357                 down_write(&all_cpu_access_lock);
358         } else {
359                 /* gain it for accessing a cpu ring buffer. */
360
361                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
362                 down_read(&all_cpu_access_lock);
363
364                 /* Secondly block other access to this @cpu ring buffer. */
365                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
366         }
367 }
368
369 static inline void trace_access_unlock(int cpu)
370 {
371         if (cpu == RING_BUFFER_ALL_CPUS) {
372                 up_write(&all_cpu_access_lock);
373         } else {
374                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
375                 up_read(&all_cpu_access_lock);
376         }
377 }
378
379 static inline void trace_access_lock_init(void)
380 {
381         int cpu;
382
383         for_each_possible_cpu(cpu)
384                 mutex_init(&per_cpu(cpu_access_lock, cpu));
385 }
386
387 #else
388
389 static DEFINE_MUTEX(access_lock);
390
391 static inline void trace_access_lock(int cpu)
392 {
393         (void)cpu;
394         mutex_lock(&access_lock);
395 }
396
397 static inline void trace_access_unlock(int cpu)
398 {
399         (void)cpu;
400         mutex_unlock(&access_lock);
401 }
402
403 static inline void trace_access_lock_init(void)
404 {
405 }
406
407 #endif
408
409 /* trace_flags holds trace_options default values */
410 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
411         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
412         TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
413         TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION;
414
415 static void tracer_tracing_on(struct trace_array *tr)
416 {
417         if (tr->trace_buffer.buffer)
418                 ring_buffer_record_on(tr->trace_buffer.buffer);
419         /*
420          * This flag is looked at when buffers haven't been allocated
421          * yet, or by some tracers (like irqsoff), that just want to
422          * know if the ring buffer has been disabled, but it can handle
423          * races of where it gets disabled but we still do a record.
424          * As the check is in the fast path of the tracers, it is more
425          * important to be fast than accurate.
426          */
427         tr->buffer_disabled = 0;
428         /* Make the flag seen by readers */
429         smp_wmb();
430 }
431
432 /**
433  * tracing_on - enable tracing buffers
434  *
435  * This function enables tracing buffers that may have been
436  * disabled with tracing_off.
437  */
438 void tracing_on(void)
439 {
440         tracer_tracing_on(&global_trace);
441 }
442 EXPORT_SYMBOL_GPL(tracing_on);
443
444 /**
445  * __trace_puts - write a constant string into the trace buffer.
446  * @ip:    The address of the caller
447  * @str:   The constant string to write
448  * @size:  The size of the string.
449  */
450 int __trace_puts(unsigned long ip, const char *str, int size)
451 {
452         struct ring_buffer_event *event;
453         struct ring_buffer *buffer;
454         struct print_entry *entry;
455         unsigned long irq_flags;
456         int alloc;
457
458         if (unlikely(tracing_selftest_running || tracing_disabled))
459                 return 0;
460
461         alloc = sizeof(*entry) + size + 2; /* possible \n added */
462
463         local_save_flags(irq_flags);
464         buffer = global_trace.trace_buffer.buffer;
465         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
466                                           irq_flags, preempt_count());
467         if (!event)
468                 return 0;
469
470         entry = ring_buffer_event_data(event);
471         entry->ip = ip;
472
473         memcpy(&entry->buf, str, size);
474
475         /* Add a newline if necessary */
476         if (entry->buf[size - 1] != '\n') {
477                 entry->buf[size] = '\n';
478                 entry->buf[size + 1] = '\0';
479         } else
480                 entry->buf[size] = '\0';
481
482         __buffer_unlock_commit(buffer, event);
483
484         return size;
485 }
486 EXPORT_SYMBOL_GPL(__trace_puts);
487
488 /**
489  * __trace_bputs - write the pointer to a constant string into trace buffer
490  * @ip:    The address of the caller
491  * @str:   The constant string to write to the buffer to
492  */
493 int __trace_bputs(unsigned long ip, const char *str)
494 {
495         struct ring_buffer_event *event;
496         struct ring_buffer *buffer;
497         struct bputs_entry *entry;
498         unsigned long irq_flags;
499         int size = sizeof(struct bputs_entry);
500
501         if (unlikely(tracing_selftest_running || tracing_disabled))
502                 return 0;
503
504         local_save_flags(irq_flags);
505         buffer = global_trace.trace_buffer.buffer;
506         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
507                                           irq_flags, preempt_count());
508         if (!event)
509                 return 0;
510
511         entry = ring_buffer_event_data(event);
512         entry->ip                       = ip;
513         entry->str                      = str;
514
515         __buffer_unlock_commit(buffer, event);
516
517         return 1;
518 }
519 EXPORT_SYMBOL_GPL(__trace_bputs);
520
521 #ifdef CONFIG_TRACER_SNAPSHOT
522 /**
523  * trace_snapshot - take a snapshot of the current buffer.
524  *
525  * This causes a swap between the snapshot buffer and the current live
526  * tracing buffer. You can use this to take snapshots of the live
527  * trace when some condition is triggered, but continue to trace.
528  *
529  * Note, make sure to allocate the snapshot with either
530  * a tracing_snapshot_alloc(), or by doing it manually
531  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
532  *
533  * If the snapshot buffer is not allocated, it will stop tracing.
534  * Basically making a permanent snapshot.
535  */
536 void tracing_snapshot(void)
537 {
538         struct trace_array *tr = &global_trace;
539         struct tracer *tracer = tr->current_trace;
540         unsigned long flags;
541
542         if (in_nmi()) {
543                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
544                 internal_trace_puts("*** snapshot is being ignored        ***\n");
545                 return;
546         }
547
548         if (!tr->allocated_snapshot) {
549                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
550                 internal_trace_puts("*** stopping trace here!   ***\n");
551                 tracing_off();
552                 return;
553         }
554
555         /* Note, snapshot can not be used when the tracer uses it */
556         if (tracer->use_max_tr) {
557                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
558                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
559                 return;
560         }
561
562         local_irq_save(flags);
563         update_max_tr(tr, current, smp_processor_id());
564         local_irq_restore(flags);
565 }
566 EXPORT_SYMBOL_GPL(tracing_snapshot);
567
568 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
569                                         struct trace_buffer *size_buf, int cpu_id);
570 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
571
572 static int alloc_snapshot(struct trace_array *tr)
573 {
574         int ret;
575
576         if (!tr->allocated_snapshot) {
577
578                 /* allocate spare buffer */
579                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
580                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
581                 if (ret < 0)
582                         return ret;
583
584                 tr->allocated_snapshot = true;
585         }
586
587         return 0;
588 }
589
590 void free_snapshot(struct trace_array *tr)
591 {
592         /*
593          * We don't free the ring buffer. instead, resize it because
594          * The max_tr ring buffer has some state (e.g. ring->clock) and
595          * we want preserve it.
596          */
597         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
598         set_buffer_entries(&tr->max_buffer, 1);
599         tracing_reset_online_cpus(&tr->max_buffer);
600         tr->allocated_snapshot = false;
601 }
602
603 /**
604  * tracing_alloc_snapshot - allocate snapshot buffer.
605  *
606  * This only allocates the snapshot buffer if it isn't already
607  * allocated - it doesn't also take a snapshot.
608  *
609  * This is meant to be used in cases where the snapshot buffer needs
610  * to be set up for events that can't sleep but need to be able to
611  * trigger a snapshot.
612  */
613 int tracing_alloc_snapshot(void)
614 {
615         struct trace_array *tr = &global_trace;
616         int ret;
617
618         ret = alloc_snapshot(tr);
619         WARN_ON(ret < 0);
620
621         return ret;
622 }
623 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
624
625 /**
626  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
627  *
628  * This is similar to trace_snapshot(), but it will allocate the
629  * snapshot buffer if it isn't already allocated. Use this only
630  * where it is safe to sleep, as the allocation may sleep.
631  *
632  * This causes a swap between the snapshot buffer and the current live
633  * tracing buffer. You can use this to take snapshots of the live
634  * trace when some condition is triggered, but continue to trace.
635  */
636 void tracing_snapshot_alloc(void)
637 {
638         int ret;
639
640         ret = tracing_alloc_snapshot();
641         if (ret < 0)
642                 return;
643
644         tracing_snapshot();
645 }
646 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
647 #else
648 void tracing_snapshot(void)
649 {
650         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
651 }
652 EXPORT_SYMBOL_GPL(tracing_snapshot);
653 int tracing_alloc_snapshot(void)
654 {
655         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
656         return -ENODEV;
657 }
658 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
659 void tracing_snapshot_alloc(void)
660 {
661         /* Give warning */
662         tracing_snapshot();
663 }
664 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
665 #endif /* CONFIG_TRACER_SNAPSHOT */
666
667 static void tracer_tracing_off(struct trace_array *tr)
668 {
669         if (tr->trace_buffer.buffer)
670                 ring_buffer_record_off(tr->trace_buffer.buffer);
671         /*
672          * This flag is looked at when buffers haven't been allocated
673          * yet, or by some tracers (like irqsoff), that just want to
674          * know if the ring buffer has been disabled, but it can handle
675          * races of where it gets disabled but we still do a record.
676          * As the check is in the fast path of the tracers, it is more
677          * important to be fast than accurate.
678          */
679         tr->buffer_disabled = 1;
680         /* Make the flag seen by readers */
681         smp_wmb();
682 }
683
684 /**
685  * tracing_off - turn off tracing buffers
686  *
687  * This function stops the tracing buffers from recording data.
688  * It does not disable any overhead the tracers themselves may
689  * be causing. This function simply causes all recording to
690  * the ring buffers to fail.
691  */
692 void tracing_off(void)
693 {
694         tracer_tracing_off(&global_trace);
695 }
696 EXPORT_SYMBOL_GPL(tracing_off);
697
698 void disable_trace_on_warning(void)
699 {
700         if (__disable_trace_on_warning)
701                 tracing_off();
702 }
703
704 /**
705  * tracer_tracing_is_on - show real state of ring buffer enabled
706  * @tr : the trace array to know if ring buffer is enabled
707  *
708  * Shows real state of the ring buffer if it is enabled or not.
709  */
710 static int tracer_tracing_is_on(struct trace_array *tr)
711 {
712         if (tr->trace_buffer.buffer)
713                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
714         return !tr->buffer_disabled;
715 }
716
717 /**
718  * tracing_is_on - show state of ring buffers enabled
719  */
720 int tracing_is_on(void)
721 {
722         return tracer_tracing_is_on(&global_trace);
723 }
724 EXPORT_SYMBOL_GPL(tracing_is_on);
725
726 static int __init set_buf_size(char *str)
727 {
728         unsigned long buf_size;
729
730         if (!str)
731                 return 0;
732         buf_size = memparse(str, &str);
733         /* nr_entries can not be zero */
734         if (buf_size == 0)
735                 return 0;
736         trace_buf_size = buf_size;
737         return 1;
738 }
739 __setup("trace_buf_size=", set_buf_size);
740
741 static int __init set_tracing_thresh(char *str)
742 {
743         unsigned long threshold;
744         int ret;
745
746         if (!str)
747                 return 0;
748         ret = kstrtoul(str, 0, &threshold);
749         if (ret < 0)
750                 return 0;
751         tracing_thresh = threshold * 1000;
752         return 1;
753 }
754 __setup("tracing_thresh=", set_tracing_thresh);
755
756 unsigned long nsecs_to_usecs(unsigned long nsecs)
757 {
758         return nsecs / 1000;
759 }
760
761 /* These must match the bit postions in trace_iterator_flags */
762 static const char *trace_options[] = {
763         "print-parent",
764         "sym-offset",
765         "sym-addr",
766         "verbose",
767         "raw",
768         "hex",
769         "bin",
770         "block",
771         "stacktrace",
772         "trace_printk",
773         "ftrace_preempt",
774         "branch",
775         "annotate",
776         "userstacktrace",
777         "sym-userobj",
778         "printk-msg-only",
779         "context-info",
780         "latency-format",
781         "sleep-time",
782         "graph-time",
783         "record-cmd",
784         "overwrite",
785         "disable_on_free",
786         "irq-info",
787         "markers",
788         "function-trace",
789         NULL
790 };
791
792 static struct {
793         u64 (*func)(void);
794         const char *name;
795         int in_ns;              /* is this clock in nanoseconds? */
796 } trace_clocks[] = {
797         { trace_clock_local,    "local",        1 },
798         { trace_clock_global,   "global",       1 },
799         { trace_clock_counter,  "counter",      0 },
800         { trace_clock_jiffies,  "uptime",       1 },
801         { trace_clock,          "perf",         1 },
802         ARCH_TRACE_CLOCKS
803 };
804
805 /*
806  * trace_parser_get_init - gets the buffer for trace parser
807  */
808 int trace_parser_get_init(struct trace_parser *parser, int size)
809 {
810         memset(parser, 0, sizeof(*parser));
811
812         parser->buffer = kmalloc(size, GFP_KERNEL);
813         if (!parser->buffer)
814                 return 1;
815
816         parser->size = size;
817         return 0;
818 }
819
820 /*
821  * trace_parser_put - frees the buffer for trace parser
822  */
823 void trace_parser_put(struct trace_parser *parser)
824 {
825         kfree(parser->buffer);
826 }
827
828 /*
829  * trace_get_user - reads the user input string separated by  space
830  * (matched by isspace(ch))
831  *
832  * For each string found the 'struct trace_parser' is updated,
833  * and the function returns.
834  *
835  * Returns number of bytes read.
836  *
837  * See kernel/trace/trace.h for 'struct trace_parser' details.
838  */
839 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
840         size_t cnt, loff_t *ppos)
841 {
842         char ch;
843         size_t read = 0;
844         ssize_t ret;
845
846         if (!*ppos)
847                 trace_parser_clear(parser);
848
849         ret = get_user(ch, ubuf++);
850         if (ret)
851                 goto out;
852
853         read++;
854         cnt--;
855
856         /*
857          * The parser is not finished with the last write,
858          * continue reading the user input without skipping spaces.
859          */
860         if (!parser->cont) {
861                 /* skip white space */
862                 while (cnt && isspace(ch)) {
863                         ret = get_user(ch, ubuf++);
864                         if (ret)
865                                 goto out;
866                         read++;
867                         cnt--;
868                 }
869
870                 /* only spaces were written */
871                 if (isspace(ch)) {
872                         *ppos += read;
873                         ret = read;
874                         goto out;
875                 }
876
877                 parser->idx = 0;
878         }
879
880         /* read the non-space input */
881         while (cnt && !isspace(ch)) {
882                 if (parser->idx < parser->size - 1)
883                         parser->buffer[parser->idx++] = ch;
884                 else {
885                         ret = -EINVAL;
886                         goto out;
887                 }
888                 ret = get_user(ch, ubuf++);
889                 if (ret)
890                         goto out;
891                 read++;
892                 cnt--;
893         }
894
895         /* We either got finished input or we have to wait for another call. */
896         if (isspace(ch)) {
897                 parser->buffer[parser->idx] = 0;
898                 parser->cont = false;
899         } else if (parser->idx < parser->size - 1) {
900                 parser->cont = true;
901                 parser->buffer[parser->idx++] = ch;
902         } else {
903                 ret = -EINVAL;
904                 goto out;
905         }
906
907         *ppos += read;
908         ret = read;
909
910 out:
911         return ret;
912 }
913
914 ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
915 {
916         int len;
917         int ret;
918
919         if (!cnt)
920                 return 0;
921
922         if (s->len <= s->readpos)
923                 return -EBUSY;
924
925         len = s->len - s->readpos;
926         if (cnt > len)
927                 cnt = len;
928         ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
929         if (ret == cnt)
930                 return -EFAULT;
931
932         cnt -= ret;
933
934         s->readpos += cnt;
935         return cnt;
936 }
937
938 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
939 {
940         int len;
941
942         if (s->len <= s->readpos)
943                 return -EBUSY;
944
945         len = s->len - s->readpos;
946         if (cnt > len)
947                 cnt = len;
948         memcpy(buf, s->buffer + s->readpos, cnt);
949
950         s->readpos += cnt;
951         return cnt;
952 }
953
954 /*
955  * ftrace_max_lock is used to protect the swapping of buffers
956  * when taking a max snapshot. The buffers themselves are
957  * protected by per_cpu spinlocks. But the action of the swap
958  * needs its own lock.
959  *
960  * This is defined as a arch_spinlock_t in order to help
961  * with performance when lockdep debugging is enabled.
962  *
963  * It is also used in other places outside the update_max_tr
964  * so it needs to be defined outside of the
965  * CONFIG_TRACER_MAX_TRACE.
966  */
967 static arch_spinlock_t ftrace_max_lock =
968         (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
969
970 unsigned long __read_mostly     tracing_thresh;
971
972 #ifdef CONFIG_TRACER_MAX_TRACE
973 unsigned long __read_mostly     tracing_max_latency;
974
975 /*
976  * Copy the new maximum trace into the separate maximum-trace
977  * structure. (this way the maximum trace is permanently saved,
978  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
979  */
980 static void
981 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
982 {
983         struct trace_buffer *trace_buf = &tr->trace_buffer;
984         struct trace_buffer *max_buf = &tr->max_buffer;
985         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
986         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
987
988         max_buf->cpu = cpu;
989         max_buf->time_start = data->preempt_timestamp;
990
991         max_data->saved_latency = tracing_max_latency;
992         max_data->critical_start = data->critical_start;
993         max_data->critical_end = data->critical_end;
994
995         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
996         max_data->pid = tsk->pid;
997         /*
998          * If tsk == current, then use current_uid(), as that does not use
999          * RCU. The irq tracer can be called out of RCU scope.
1000          */
1001         if (tsk == current)
1002                 max_data->uid = current_uid();
1003         else
1004                 max_data->uid = task_uid(tsk);
1005
1006         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1007         max_data->policy = tsk->policy;
1008         max_data->rt_priority = tsk->rt_priority;
1009
1010         /* record this tasks comm */
1011         tracing_record_cmdline(tsk);
1012 }
1013
1014 /**
1015  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1016  * @tr: tracer
1017  * @tsk: the task with the latency
1018  * @cpu: The cpu that initiated the trace.
1019  *
1020  * Flip the buffers between the @tr and the max_tr and record information
1021  * about which task was the cause of this latency.
1022  */
1023 void
1024 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1025 {
1026         struct ring_buffer *buf;
1027
1028         if (tr->stop_count)
1029                 return;
1030
1031         WARN_ON_ONCE(!irqs_disabled());
1032
1033         if (!tr->allocated_snapshot) {
1034                 /* Only the nop tracer should hit this when disabling */
1035                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1036                 return;
1037         }
1038
1039         arch_spin_lock(&ftrace_max_lock);
1040
1041         buf = tr->trace_buffer.buffer;
1042         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1043         tr->max_buffer.buffer = buf;
1044
1045         __update_max_tr(tr, tsk, cpu);
1046         arch_spin_unlock(&ftrace_max_lock);
1047 }
1048
1049 /**
1050  * update_max_tr_single - only copy one trace over, and reset the rest
1051  * @tr - tracer
1052  * @tsk - task with the latency
1053  * @cpu - the cpu of the buffer to copy.
1054  *
1055  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1056  */
1057 void
1058 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1059 {
1060         int ret;
1061
1062         if (tr->stop_count)
1063                 return;
1064
1065         WARN_ON_ONCE(!irqs_disabled());
1066         if (!tr->allocated_snapshot) {
1067                 /* Only the nop tracer should hit this when disabling */
1068                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1069                 return;
1070         }
1071
1072         arch_spin_lock(&ftrace_max_lock);
1073
1074         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1075
1076         if (ret == -EBUSY) {
1077                 /*
1078                  * We failed to swap the buffer due to a commit taking
1079                  * place on this CPU. We fail to record, but we reset
1080                  * the max trace buffer (no one writes directly to it)
1081                  * and flag that it failed.
1082                  */
1083                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1084                         "Failed to swap buffers due to commit in progress\n");
1085         }
1086
1087         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1088
1089         __update_max_tr(tr, tsk, cpu);
1090         arch_spin_unlock(&ftrace_max_lock);
1091 }
1092 #endif /* CONFIG_TRACER_MAX_TRACE */
1093
1094 static void default_wait_pipe(struct trace_iterator *iter)
1095 {
1096         /* Iterators are static, they should be filled or empty */
1097         if (trace_buffer_iter(iter, iter->cpu_file))
1098                 return;
1099
1100         ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
1101 }
1102
1103 #ifdef CONFIG_FTRACE_STARTUP_TEST
1104 static int run_tracer_selftest(struct tracer *type)
1105 {
1106         struct trace_array *tr = &global_trace;
1107         struct tracer *saved_tracer = tr->current_trace;
1108         int ret;
1109
1110         if (!type->selftest || tracing_selftest_disabled)
1111                 return 0;
1112
1113         /*
1114          * Run a selftest on this tracer.
1115          * Here we reset the trace buffer, and set the current
1116          * tracer to be this tracer. The tracer can then run some
1117          * internal tracing to verify that everything is in order.
1118          * If we fail, we do not register this tracer.
1119          */
1120         tracing_reset_online_cpus(&tr->trace_buffer);
1121
1122         tr->current_trace = type;
1123
1124 #ifdef CONFIG_TRACER_MAX_TRACE
1125         if (type->use_max_tr) {
1126                 /* If we expanded the buffers, make sure the max is expanded too */
1127                 if (ring_buffer_expanded)
1128                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1129                                            RING_BUFFER_ALL_CPUS);
1130                 tr->allocated_snapshot = true;
1131         }
1132 #endif
1133
1134         /* the test is responsible for initializing and enabling */
1135         pr_info("Testing tracer %s: ", type->name);
1136         ret = type->selftest(type, tr);
1137         /* the test is responsible for resetting too */
1138         tr->current_trace = saved_tracer;
1139         if (ret) {
1140                 printk(KERN_CONT "FAILED!\n");
1141                 /* Add the warning after printing 'FAILED' */
1142                 WARN_ON(1);
1143                 return -1;
1144         }
1145         /* Only reset on passing, to avoid touching corrupted buffers */
1146         tracing_reset_online_cpus(&tr->trace_buffer);
1147
1148 #ifdef CONFIG_TRACER_MAX_TRACE
1149         if (type->use_max_tr) {
1150                 tr->allocated_snapshot = false;
1151
1152                 /* Shrink the max buffer again */
1153                 if (ring_buffer_expanded)
1154                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1155                                            RING_BUFFER_ALL_CPUS);
1156         }
1157 #endif
1158
1159         printk(KERN_CONT "PASSED\n");
1160         return 0;
1161 }
1162 #else
1163 static inline int run_tracer_selftest(struct tracer *type)
1164 {
1165         return 0;
1166 }
1167 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1168
1169 /**
1170  * register_tracer - register a tracer with the ftrace system.
1171  * @type - the plugin for the tracer
1172  *
1173  * Register a new plugin tracer.
1174  */
1175 int register_tracer(struct tracer *type)
1176 {
1177         struct tracer *t;
1178         int ret = 0;
1179
1180         if (!type->name) {
1181                 pr_info("Tracer must have a name\n");
1182                 return -1;
1183         }
1184
1185         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1186                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1187                 return -1;
1188         }
1189
1190         mutex_lock(&trace_types_lock);
1191
1192         tracing_selftest_running = true;
1193
1194         for (t = trace_types; t; t = t->next) {
1195                 if (strcmp(type->name, t->name) == 0) {
1196                         /* already found */
1197                         pr_info("Tracer %s already registered\n",
1198                                 type->name);
1199                         ret = -1;
1200                         goto out;
1201                 }
1202         }
1203
1204         if (!type->set_flag)
1205                 type->set_flag = &dummy_set_flag;
1206         if (!type->flags)
1207                 type->flags = &dummy_tracer_flags;
1208         else
1209                 if (!type->flags->opts)
1210                         type->flags->opts = dummy_tracer_opt;
1211         if (!type->wait_pipe)
1212                 type->wait_pipe = default_wait_pipe;
1213
1214         ret = run_tracer_selftest(type);
1215         if (ret < 0)
1216                 goto out;
1217
1218         type->next = trace_types;
1219         trace_types = type;
1220
1221  out:
1222         tracing_selftest_running = false;
1223         mutex_unlock(&trace_types_lock);
1224
1225         if (ret || !default_bootup_tracer)
1226                 goto out_unlock;
1227
1228         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1229                 goto out_unlock;
1230
1231         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1232         /* Do we want this tracer to start on bootup? */
1233         tracing_set_tracer(type->name);
1234         default_bootup_tracer = NULL;
1235         /* disable other selftests, since this will break it. */
1236         tracing_selftest_disabled = true;
1237 #ifdef CONFIG_FTRACE_STARTUP_TEST
1238         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1239                type->name);
1240 #endif
1241
1242  out_unlock:
1243         return ret;
1244 }
1245
1246 void tracing_reset(struct trace_buffer *buf, int cpu)
1247 {
1248         struct ring_buffer *buffer = buf->buffer;
1249
1250         if (!buffer)
1251                 return;
1252
1253         ring_buffer_record_disable(buffer);
1254
1255         /* Make sure all commits have finished */
1256         synchronize_sched();
1257         ring_buffer_reset_cpu(buffer, cpu);
1258
1259         ring_buffer_record_enable(buffer);
1260 }
1261
1262 void tracing_reset_online_cpus(struct trace_buffer *buf)
1263 {
1264         struct ring_buffer *buffer = buf->buffer;
1265         int cpu;
1266
1267         if (!buffer)
1268                 return;
1269
1270         ring_buffer_record_disable(buffer);
1271
1272         /* Make sure all commits have finished */
1273         synchronize_sched();
1274
1275         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1276
1277         for_each_online_cpu(cpu)
1278                 ring_buffer_reset_cpu(buffer, cpu);
1279
1280         ring_buffer_record_enable(buffer);
1281 }
1282
1283 /* Must have trace_types_lock held */
1284 void tracing_reset_all_online_cpus(void)
1285 {
1286         struct trace_array *tr;
1287
1288         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1289                 tracing_reset_online_cpus(&tr->trace_buffer);
1290 #ifdef CONFIG_TRACER_MAX_TRACE
1291                 tracing_reset_online_cpus(&tr->max_buffer);
1292 #endif
1293         }
1294 }
1295
1296 #define SAVED_CMDLINES 128
1297 #define NO_CMDLINE_MAP UINT_MAX
1298 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1299 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
1300 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
1301 static int cmdline_idx;
1302 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1303
1304 /* temporary disable recording */
1305 static atomic_t trace_record_cmdline_disabled __read_mostly;
1306
1307 static void trace_init_cmdlines(void)
1308 {
1309         memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
1310         memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
1311         cmdline_idx = 0;
1312 }
1313
1314 int is_tracing_stopped(void)
1315 {
1316         return global_trace.stop_count;
1317 }
1318
1319 /**
1320  * tracing_start - quick start of the tracer
1321  *
1322  * If tracing is enabled but was stopped by tracing_stop,
1323  * this will start the tracer back up.
1324  */
1325 void tracing_start(void)
1326 {
1327         struct ring_buffer *buffer;
1328         unsigned long flags;
1329
1330         if (tracing_disabled)
1331                 return;
1332
1333         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1334         if (--global_trace.stop_count) {
1335                 if (global_trace.stop_count < 0) {
1336                         /* Someone screwed up their debugging */
1337                         WARN_ON_ONCE(1);
1338                         global_trace.stop_count = 0;
1339                 }
1340                 goto out;
1341         }
1342
1343         /* Prevent the buffers from switching */
1344         arch_spin_lock(&ftrace_max_lock);
1345
1346         buffer = global_trace.trace_buffer.buffer;
1347         if (buffer)
1348                 ring_buffer_record_enable(buffer);
1349
1350 #ifdef CONFIG_TRACER_MAX_TRACE
1351         buffer = global_trace.max_buffer.buffer;
1352         if (buffer)
1353                 ring_buffer_record_enable(buffer);
1354 #endif
1355
1356         arch_spin_unlock(&ftrace_max_lock);
1357
1358         ftrace_start();
1359  out:
1360         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1361 }
1362
1363 static void tracing_start_tr(struct trace_array *tr)
1364 {
1365         struct ring_buffer *buffer;
1366         unsigned long flags;
1367
1368         if (tracing_disabled)
1369                 return;
1370
1371         /* If global, we need to also start the max tracer */
1372         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1373                 return tracing_start();
1374
1375         raw_spin_lock_irqsave(&tr->start_lock, flags);
1376
1377         if (--tr->stop_count) {
1378                 if (tr->stop_count < 0) {
1379                         /* Someone screwed up their debugging */
1380                         WARN_ON_ONCE(1);
1381                         tr->stop_count = 0;
1382                 }
1383                 goto out;
1384         }
1385
1386         buffer = tr->trace_buffer.buffer;
1387         if (buffer)
1388                 ring_buffer_record_enable(buffer);
1389
1390  out:
1391         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1392 }
1393
1394 /**
1395  * tracing_stop - quick stop of the tracer
1396  *
1397  * Light weight way to stop tracing. Use in conjunction with
1398  * tracing_start.
1399  */
1400 void tracing_stop(void)
1401 {
1402         struct ring_buffer *buffer;
1403         unsigned long flags;
1404
1405         ftrace_stop();
1406         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1407         if (global_trace.stop_count++)
1408                 goto out;
1409
1410         /* Prevent the buffers from switching */
1411         arch_spin_lock(&ftrace_max_lock);
1412
1413         buffer = global_trace.trace_buffer.buffer;
1414         if (buffer)
1415                 ring_buffer_record_disable(buffer);
1416
1417 #ifdef CONFIG_TRACER_MAX_TRACE
1418         buffer = global_trace.max_buffer.buffer;
1419         if (buffer)
1420                 ring_buffer_record_disable(buffer);
1421 #endif
1422
1423         arch_spin_unlock(&ftrace_max_lock);
1424
1425  out:
1426         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1427 }
1428
1429 static void tracing_stop_tr(struct trace_array *tr)
1430 {
1431         struct ring_buffer *buffer;
1432         unsigned long flags;
1433
1434         /* If global, we need to also stop the max tracer */
1435         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1436                 return tracing_stop();
1437
1438         raw_spin_lock_irqsave(&tr->start_lock, flags);
1439         if (tr->stop_count++)
1440                 goto out;
1441
1442         buffer = tr->trace_buffer.buffer;
1443         if (buffer)
1444                 ring_buffer_record_disable(buffer);
1445
1446  out:
1447         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1448 }
1449
1450 void trace_stop_cmdline_recording(void);
1451
1452 static void trace_save_cmdline(struct task_struct *tsk)
1453 {
1454         unsigned pid, idx;
1455
1456         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1457                 return;
1458
1459         /*
1460          * It's not the end of the world if we don't get
1461          * the lock, but we also don't want to spin
1462          * nor do we want to disable interrupts,
1463          * so if we miss here, then better luck next time.
1464          */
1465         if (!arch_spin_trylock(&trace_cmdline_lock))
1466                 return;
1467
1468         idx = map_pid_to_cmdline[tsk->pid];
1469         if (idx == NO_CMDLINE_MAP) {
1470                 idx = (cmdline_idx + 1) % SAVED_CMDLINES;
1471
1472                 /*
1473                  * Check whether the cmdline buffer at idx has a pid
1474                  * mapped. We are going to overwrite that entry so we
1475                  * need to clear the map_pid_to_cmdline. Otherwise we
1476                  * would read the new comm for the old pid.
1477                  */
1478                 pid = map_cmdline_to_pid[idx];
1479                 if (pid != NO_CMDLINE_MAP)
1480                         map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1481
1482                 map_cmdline_to_pid[idx] = tsk->pid;
1483                 map_pid_to_cmdline[tsk->pid] = idx;
1484
1485                 cmdline_idx = idx;
1486         }
1487
1488         memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
1489
1490         arch_spin_unlock(&trace_cmdline_lock);
1491 }
1492
1493 void trace_find_cmdline(int pid, char comm[])
1494 {
1495         unsigned map;
1496
1497         if (!pid) {
1498                 strcpy(comm, "<idle>");
1499                 return;
1500         }
1501
1502         if (WARN_ON_ONCE(pid < 0)) {
1503                 strcpy(comm, "<XXX>");
1504                 return;
1505         }
1506
1507         if (pid > PID_MAX_DEFAULT) {
1508                 strcpy(comm, "<...>");
1509                 return;
1510         }
1511
1512         preempt_disable();
1513         arch_spin_lock(&trace_cmdline_lock);
1514         map = map_pid_to_cmdline[pid];
1515         if (map != NO_CMDLINE_MAP)
1516                 strcpy(comm, saved_cmdlines[map]);
1517         else
1518                 strcpy(comm, "<...>");
1519
1520         arch_spin_unlock(&trace_cmdline_lock);
1521         preempt_enable();
1522 }
1523
1524 void tracing_record_cmdline(struct task_struct *tsk)
1525 {
1526         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1527                 return;
1528
1529         if (!__this_cpu_read(trace_cmdline_save))
1530                 return;
1531
1532         __this_cpu_write(trace_cmdline_save, false);
1533
1534         trace_save_cmdline(tsk);
1535 }
1536
1537 void
1538 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1539                              int pc)
1540 {
1541         struct task_struct *tsk = current;
1542
1543         entry->preempt_count            = pc & 0xff;
1544         entry->pid                      = (tsk) ? tsk->pid : 0;
1545         entry->flags =
1546 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1547                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1548 #else
1549                 TRACE_FLAG_IRQS_NOSUPPORT |
1550 #endif
1551                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1552                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1553                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1554                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1555 }
1556 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1557
1558 struct ring_buffer_event *
1559 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1560                           int type,
1561                           unsigned long len,
1562                           unsigned long flags, int pc)
1563 {
1564         struct ring_buffer_event *event;
1565
1566         event = ring_buffer_lock_reserve(buffer, len);
1567         if (event != NULL) {
1568                 struct trace_entry *ent = ring_buffer_event_data(event);
1569
1570                 tracing_generic_entry_update(ent, flags, pc);
1571                 ent->type = type;
1572         }
1573
1574         return event;
1575 }
1576
1577 void
1578 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1579 {
1580         __this_cpu_write(trace_cmdline_save, true);
1581         ring_buffer_unlock_commit(buffer, event);
1582 }
1583
1584 static inline void
1585 __trace_buffer_unlock_commit(struct ring_buffer *buffer,
1586                              struct ring_buffer_event *event,
1587                              unsigned long flags, int pc)
1588 {
1589         __buffer_unlock_commit(buffer, event);
1590
1591         ftrace_trace_stack(buffer, flags, 6, pc);
1592         ftrace_trace_userstack(buffer, flags, pc);
1593 }
1594
1595 void trace_buffer_unlock_commit(struct ring_buffer *buffer,
1596                                 struct ring_buffer_event *event,
1597                                 unsigned long flags, int pc)
1598 {
1599         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1600 }
1601 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1602
1603 static struct ring_buffer *temp_buffer;
1604
1605 struct ring_buffer_event *
1606 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1607                           struct ftrace_event_file *ftrace_file,
1608                           int type, unsigned long len,
1609                           unsigned long flags, int pc)
1610 {
1611         struct ring_buffer_event *entry;
1612
1613         *current_rb = ftrace_file->tr->trace_buffer.buffer;
1614         entry = trace_buffer_lock_reserve(*current_rb,
1615                                          type, len, flags, pc);
1616         /*
1617          * If tracing is off, but we have triggers enabled
1618          * we still need to look at the event data. Use the temp_buffer
1619          * to store the trace event for the tigger to use. It's recusive
1620          * safe and will not be recorded anywhere.
1621          */
1622         if (!entry && ftrace_file->flags & FTRACE_EVENT_FL_TRIGGER_COND) {
1623                 *current_rb = temp_buffer;
1624                 entry = trace_buffer_lock_reserve(*current_rb,
1625                                                   type, len, flags, pc);
1626         }
1627         return entry;
1628 }
1629 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1630
1631 struct ring_buffer_event *
1632 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1633                                   int type, unsigned long len,
1634                                   unsigned long flags, int pc)
1635 {
1636         *current_rb = global_trace.trace_buffer.buffer;
1637         return trace_buffer_lock_reserve(*current_rb,
1638                                          type, len, flags, pc);
1639 }
1640 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1641
1642 void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
1643                                         struct ring_buffer_event *event,
1644                                         unsigned long flags, int pc)
1645 {
1646         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1647 }
1648 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
1649
1650 void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
1651                                      struct ring_buffer_event *event,
1652                                      unsigned long flags, int pc,
1653                                      struct pt_regs *regs)
1654 {
1655         __buffer_unlock_commit(buffer, event);
1656
1657         ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
1658         ftrace_trace_userstack(buffer, flags, pc);
1659 }
1660 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1661
1662 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1663                                          struct ring_buffer_event *event)
1664 {
1665         ring_buffer_discard_commit(buffer, event);
1666 }
1667 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1668
1669 void
1670 trace_function(struct trace_array *tr,
1671                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1672                int pc)
1673 {
1674         struct ftrace_event_call *call = &event_function;
1675         struct ring_buffer *buffer = tr->trace_buffer.buffer;
1676         struct ring_buffer_event *event;
1677         struct ftrace_entry *entry;
1678
1679         /* If we are reading the ring buffer, don't trace */
1680         if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1681                 return;
1682
1683         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1684                                           flags, pc);
1685         if (!event)
1686                 return;
1687         entry   = ring_buffer_event_data(event);
1688         entry->ip                       = ip;
1689         entry->parent_ip                = parent_ip;
1690
1691         if (!call_filter_check_discard(call, entry, buffer, event))
1692                 __buffer_unlock_commit(buffer, event);
1693 }
1694
1695 #ifdef CONFIG_STACKTRACE
1696
1697 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1698 struct ftrace_stack {
1699         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
1700 };
1701
1702 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1703 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1704
1705 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1706                                  unsigned long flags,
1707                                  int skip, int pc, struct pt_regs *regs)
1708 {
1709         struct ftrace_event_call *call = &event_kernel_stack;
1710         struct ring_buffer_event *event;
1711         struct stack_entry *entry;
1712         struct stack_trace trace;
1713         int use_stack;
1714         int size = FTRACE_STACK_ENTRIES;
1715
1716         trace.nr_entries        = 0;
1717         trace.skip              = skip;
1718
1719         /*
1720          * Since events can happen in NMIs there's no safe way to
1721          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1722          * or NMI comes in, it will just have to use the default
1723          * FTRACE_STACK_SIZE.
1724          */
1725         preempt_disable_notrace();
1726
1727         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1728         /*
1729          * We don't need any atomic variables, just a barrier.
1730          * If an interrupt comes in, we don't care, because it would
1731          * have exited and put the counter back to what we want.
1732          * We just need a barrier to keep gcc from moving things
1733          * around.
1734          */
1735         barrier();
1736         if (use_stack == 1) {
1737                 trace.entries           = &__get_cpu_var(ftrace_stack).calls[0];
1738                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
1739
1740                 if (regs)
1741                         save_stack_trace_regs(regs, &trace);
1742                 else
1743                         save_stack_trace(&trace);
1744
1745                 if (trace.nr_entries > size)
1746                         size = trace.nr_entries;
1747         } else
1748                 /* From now on, use_stack is a boolean */
1749                 use_stack = 0;
1750
1751         size *= sizeof(unsigned long);
1752
1753         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1754                                           sizeof(*entry) + size, flags, pc);
1755         if (!event)
1756                 goto out;
1757         entry = ring_buffer_event_data(event);
1758
1759         memset(&entry->caller, 0, size);
1760
1761         if (use_stack)
1762                 memcpy(&entry->caller, trace.entries,
1763                        trace.nr_entries * sizeof(unsigned long));
1764         else {
1765                 trace.max_entries       = FTRACE_STACK_ENTRIES;
1766                 trace.entries           = entry->caller;
1767                 if (regs)
1768                         save_stack_trace_regs(regs, &trace);
1769                 else
1770                         save_stack_trace(&trace);
1771         }
1772
1773         entry->size = trace.nr_entries;
1774
1775         if (!call_filter_check_discard(call, entry, buffer, event))
1776                 __buffer_unlock_commit(buffer, event);
1777
1778  out:
1779         /* Again, don't let gcc optimize things here */
1780         barrier();
1781         __this_cpu_dec(ftrace_stack_reserve);
1782         preempt_enable_notrace();
1783
1784 }
1785
1786 void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
1787                              int skip, int pc, struct pt_regs *regs)
1788 {
1789         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1790                 return;
1791
1792         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1793 }
1794
1795 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1796                         int skip, int pc)
1797 {
1798         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1799                 return;
1800
1801         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
1802 }
1803
1804 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1805                    int pc)
1806 {
1807         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1808 }
1809
1810 /**
1811  * trace_dump_stack - record a stack back trace in the trace buffer
1812  * @skip: Number of functions to skip (helper handlers)
1813  */
1814 void trace_dump_stack(int skip)
1815 {
1816         unsigned long flags;
1817
1818         if (tracing_disabled || tracing_selftest_running)
1819                 return;
1820
1821         local_save_flags(flags);
1822
1823         /*
1824          * Skip 3 more, seems to get us at the caller of
1825          * this function.
1826          */
1827         skip += 3;
1828         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
1829                              flags, skip, preempt_count(), NULL);
1830 }
1831
1832 static DEFINE_PER_CPU(int, user_stack_count);
1833
1834 void
1835 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1836 {
1837         struct ftrace_event_call *call = &event_user_stack;
1838         struct ring_buffer_event *event;
1839         struct userstack_entry *entry;
1840         struct stack_trace trace;
1841
1842         if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1843                 return;
1844
1845         /*
1846          * NMIs can not handle page faults, even with fix ups.
1847          * The save user stack can (and often does) fault.
1848          */
1849         if (unlikely(in_nmi()))
1850                 return;
1851
1852         /*
1853          * prevent recursion, since the user stack tracing may
1854          * trigger other kernel events.
1855          */
1856         preempt_disable();
1857         if (__this_cpu_read(user_stack_count))
1858                 goto out;
1859
1860         __this_cpu_inc(user_stack_count);
1861
1862         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1863                                           sizeof(*entry), flags, pc);
1864         if (!event)
1865                 goto out_drop_count;
1866         entry   = ring_buffer_event_data(event);
1867
1868         entry->tgid             = current->tgid;
1869         memset(&entry->caller, 0, sizeof(entry->caller));
1870
1871         trace.nr_entries        = 0;
1872         trace.max_entries       = FTRACE_STACK_ENTRIES;
1873         trace.skip              = 0;
1874         trace.entries           = entry->caller;
1875
1876         save_stack_trace_user(&trace);
1877         if (!call_filter_check_discard(call, entry, buffer, event))
1878                 __buffer_unlock_commit(buffer, event);
1879
1880  out_drop_count:
1881         __this_cpu_dec(user_stack_count);
1882  out:
1883         preempt_enable();
1884 }
1885
1886 #ifdef UNUSED
1887 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1888 {
1889         ftrace_trace_userstack(tr, flags, preempt_count());
1890 }
1891 #endif /* UNUSED */
1892
1893 #endif /* CONFIG_STACKTRACE */
1894
1895 /* created for use with alloc_percpu */
1896 struct trace_buffer_struct {
1897         char buffer[TRACE_BUF_SIZE];
1898 };
1899
1900 static struct trace_buffer_struct *trace_percpu_buffer;
1901 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1902 static struct trace_buffer_struct *trace_percpu_irq_buffer;
1903 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1904
1905 /*
1906  * The buffer used is dependent on the context. There is a per cpu
1907  * buffer for normal context, softirq contex, hard irq context and
1908  * for NMI context. Thise allows for lockless recording.
1909  *
1910  * Note, if the buffers failed to be allocated, then this returns NULL
1911  */
1912 static char *get_trace_buf(void)
1913 {
1914         struct trace_buffer_struct *percpu_buffer;
1915
1916         /*
1917          * If we have allocated per cpu buffers, then we do not
1918          * need to do any locking.
1919          */
1920         if (in_nmi())
1921                 percpu_buffer = trace_percpu_nmi_buffer;
1922         else if (in_irq())
1923                 percpu_buffer = trace_percpu_irq_buffer;
1924         else if (in_softirq())
1925                 percpu_buffer = trace_percpu_sirq_buffer;
1926         else
1927                 percpu_buffer = trace_percpu_buffer;
1928
1929         if (!percpu_buffer)
1930                 return NULL;
1931
1932         return this_cpu_ptr(&percpu_buffer->buffer[0]);
1933 }
1934
1935 static int alloc_percpu_trace_buffer(void)
1936 {
1937         struct trace_buffer_struct *buffers;
1938         struct trace_buffer_struct *sirq_buffers;
1939         struct trace_buffer_struct *irq_buffers;
1940         struct trace_buffer_struct *nmi_buffers;
1941
1942         buffers = alloc_percpu(struct trace_buffer_struct);
1943         if (!buffers)
1944                 goto err_warn;
1945
1946         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
1947         if (!sirq_buffers)
1948                 goto err_sirq;
1949
1950         irq_buffers = alloc_percpu(struct trace_buffer_struct);
1951         if (!irq_buffers)
1952                 goto err_irq;
1953
1954         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
1955         if (!nmi_buffers)
1956                 goto err_nmi;
1957
1958         trace_percpu_buffer = buffers;
1959         trace_percpu_sirq_buffer = sirq_buffers;
1960         trace_percpu_irq_buffer = irq_buffers;
1961         trace_percpu_nmi_buffer = nmi_buffers;
1962
1963         return 0;
1964
1965  err_nmi:
1966         free_percpu(irq_buffers);
1967  err_irq:
1968         free_percpu(sirq_buffers);
1969  err_sirq:
1970         free_percpu(buffers);
1971  err_warn:
1972         WARN(1, "Could not allocate percpu trace_printk buffer");
1973         return -ENOMEM;
1974 }
1975
1976 static int buffers_allocated;
1977
1978 void trace_printk_init_buffers(void)
1979 {
1980         if (buffers_allocated)
1981                 return;
1982
1983         if (alloc_percpu_trace_buffer())
1984                 return;
1985
1986         pr_info("ftrace: Allocated trace_printk buffers\n");
1987
1988         /* Expand the buffers to set size */
1989         tracing_update_buffers();
1990
1991         buffers_allocated = 1;
1992
1993         /*
1994          * trace_printk_init_buffers() can be called by modules.
1995          * If that happens, then we need to start cmdline recording
1996          * directly here. If the global_trace.buffer is already
1997          * allocated here, then this was called by module code.
1998          */
1999         if (global_trace.trace_buffer.buffer)
2000                 tracing_start_cmdline_record();
2001 }
2002
2003 void trace_printk_start_comm(void)
2004 {
2005         /* Start tracing comms if trace printk is set */
2006         if (!buffers_allocated)
2007                 return;
2008         tracing_start_cmdline_record();
2009 }
2010
2011 static void trace_printk_start_stop_comm(int enabled)
2012 {
2013         if (!buffers_allocated)
2014                 return;
2015
2016         if (enabled)
2017                 tracing_start_cmdline_record();
2018         else
2019                 tracing_stop_cmdline_record();
2020 }
2021
2022 /**
2023  * trace_vbprintk - write binary msg to tracing buffer
2024  *
2025  */
2026 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2027 {
2028         struct ftrace_event_call *call = &event_bprint;
2029         struct ring_buffer_event *event;
2030         struct ring_buffer *buffer;
2031         struct trace_array *tr = &global_trace;
2032         struct bprint_entry *entry;
2033         unsigned long flags;
2034         char *tbuffer;
2035         int len = 0, size, pc;
2036
2037         if (unlikely(tracing_selftest_running || tracing_disabled))
2038                 return 0;
2039
2040         /* Don't pollute graph traces with trace_vprintk internals */
2041         pause_graph_tracing();
2042
2043         pc = preempt_count();
2044         preempt_disable_notrace();
2045
2046         tbuffer = get_trace_buf();
2047         if (!tbuffer) {
2048                 len = 0;
2049                 goto out;
2050         }
2051
2052         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2053
2054         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2055                 goto out;
2056
2057         local_save_flags(flags);
2058         size = sizeof(*entry) + sizeof(u32) * len;
2059         buffer = tr->trace_buffer.buffer;
2060         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2061                                           flags, pc);
2062         if (!event)
2063                 goto out;
2064         entry = ring_buffer_event_data(event);
2065         entry->ip                       = ip;
2066         entry->fmt                      = fmt;
2067
2068         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2069         if (!call_filter_check_discard(call, entry, buffer, event)) {
2070                 __buffer_unlock_commit(buffer, event);
2071                 ftrace_trace_stack(buffer, flags, 6, pc);
2072         }
2073
2074 out:
2075         preempt_enable_notrace();
2076         unpause_graph_tracing();
2077
2078         return len;
2079 }
2080 EXPORT_SYMBOL_GPL(trace_vbprintk);
2081
2082 static int
2083 __trace_array_vprintk(struct ring_buffer *buffer,
2084                       unsigned long ip, const char *fmt, va_list args)
2085 {
2086         struct ftrace_event_call *call = &event_print;
2087         struct ring_buffer_event *event;
2088         int len = 0, size, pc;
2089         struct print_entry *entry;
2090         unsigned long flags;
2091         char *tbuffer;
2092
2093         if (tracing_disabled || tracing_selftest_running)
2094                 return 0;
2095
2096         /* Don't pollute graph traces with trace_vprintk internals */
2097         pause_graph_tracing();
2098
2099         pc = preempt_count();
2100         preempt_disable_notrace();
2101
2102
2103         tbuffer = get_trace_buf();
2104         if (!tbuffer) {
2105                 len = 0;
2106                 goto out;
2107         }
2108
2109         len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2110         if (len > TRACE_BUF_SIZE)
2111                 goto out;
2112
2113         local_save_flags(flags);
2114         size = sizeof(*entry) + len + 1;
2115         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2116                                           flags, pc);
2117         if (!event)
2118                 goto out;
2119         entry = ring_buffer_event_data(event);
2120         entry->ip = ip;
2121
2122         memcpy(&entry->buf, tbuffer, len);
2123         entry->buf[len] = '\0';
2124         if (!call_filter_check_discard(call, entry, buffer, event)) {
2125                 __buffer_unlock_commit(buffer, event);
2126                 ftrace_trace_stack(buffer, flags, 6, pc);
2127         }
2128  out:
2129         preempt_enable_notrace();
2130         unpause_graph_tracing();
2131
2132         return len;
2133 }
2134
2135 int trace_array_vprintk(struct trace_array *tr,
2136                         unsigned long ip, const char *fmt, va_list args)
2137 {
2138         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2139 }
2140
2141 int trace_array_printk(struct trace_array *tr,
2142                        unsigned long ip, const char *fmt, ...)
2143 {
2144         int ret;
2145         va_list ap;
2146
2147         if (!(trace_flags & TRACE_ITER_PRINTK))
2148                 return 0;
2149
2150         va_start(ap, fmt);
2151         ret = trace_array_vprintk(tr, ip, fmt, ap);
2152         va_end(ap);
2153         return ret;
2154 }
2155
2156 int trace_array_printk_buf(struct ring_buffer *buffer,
2157                            unsigned long ip, const char *fmt, ...)
2158 {
2159         int ret;
2160         va_list ap;
2161
2162         if (!(trace_flags & TRACE_ITER_PRINTK))
2163                 return 0;
2164
2165         va_start(ap, fmt);
2166         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2167         va_end(ap);
2168         return ret;
2169 }
2170
2171 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2172 {
2173         return trace_array_vprintk(&global_trace, ip, fmt, args);
2174 }
2175 EXPORT_SYMBOL_GPL(trace_vprintk);
2176
2177 static void trace_iterator_increment(struct trace_iterator *iter)
2178 {
2179         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2180
2181         iter->idx++;
2182         if (buf_iter)
2183                 ring_buffer_read(buf_iter, NULL);
2184 }
2185
2186 static struct trace_entry *
2187 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2188                 unsigned long *lost_events)
2189 {
2190         struct ring_buffer_event *event;
2191         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2192
2193         if (buf_iter)
2194                 event = ring_buffer_iter_peek(buf_iter, ts);
2195         else
2196                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2197                                          lost_events);
2198
2199         if (event) {
2200                 iter->ent_size = ring_buffer_event_length(event);
2201                 return ring_buffer_event_data(event);
2202         }
2203         iter->ent_size = 0;
2204         return NULL;
2205 }
2206
2207 static struct trace_entry *
2208 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2209                   unsigned long *missing_events, u64 *ent_ts)
2210 {
2211         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2212         struct trace_entry *ent, *next = NULL;
2213         unsigned long lost_events = 0, next_lost = 0;
2214         int cpu_file = iter->cpu_file;
2215         u64 next_ts = 0, ts;
2216         int next_cpu = -1;
2217         int next_size = 0;
2218         int cpu;
2219
2220         /*
2221          * If we are in a per_cpu trace file, don't bother by iterating over
2222          * all cpu and peek directly.
2223          */
2224         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2225                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2226                         return NULL;
2227                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2228                 if (ent_cpu)
2229                         *ent_cpu = cpu_file;
2230
2231                 return ent;
2232         }
2233
2234         for_each_tracing_cpu(cpu) {
2235
2236                 if (ring_buffer_empty_cpu(buffer, cpu))
2237                         continue;
2238
2239                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2240
2241                 /*
2242                  * Pick the entry with the smallest timestamp:
2243                  */
2244                 if (ent && (!next || ts < next_ts)) {
2245                         next = ent;
2246                         next_cpu = cpu;
2247                         next_ts = ts;
2248                         next_lost = lost_events;
2249                         next_size = iter->ent_size;
2250                 }
2251         }
2252
2253         iter->ent_size = next_size;
2254
2255         if (ent_cpu)
2256                 *ent_cpu = next_cpu;
2257
2258         if (ent_ts)
2259                 *ent_ts = next_ts;
2260
2261         if (missing_events)
2262                 *missing_events = next_lost;
2263
2264         return next;
2265 }
2266
2267 /* Find the next real entry, without updating the iterator itself */
2268 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2269                                           int *ent_cpu, u64 *ent_ts)
2270 {
2271         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2272 }
2273
2274 /* Find the next real entry, and increment the iterator to the next entry */
2275 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2276 {
2277         iter->ent = __find_next_entry(iter, &iter->cpu,
2278                                       &iter->lost_events, &iter->ts);
2279
2280         if (iter->ent)
2281                 trace_iterator_increment(iter);
2282
2283         return iter->ent ? iter : NULL;
2284 }
2285
2286 static void trace_consume(struct trace_iterator *iter)
2287 {
2288         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2289                             &iter->lost_events);
2290 }
2291
2292 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2293 {
2294         struct trace_iterator *iter = m->private;
2295         int i = (int)*pos;
2296         void *ent;
2297
2298         WARN_ON_ONCE(iter->leftover);
2299
2300         (*pos)++;
2301
2302         /* can't go backwards */
2303         if (iter->idx > i)
2304                 return NULL;
2305
2306         if (iter->idx < 0)
2307                 ent = trace_find_next_entry_inc(iter);
2308         else
2309                 ent = iter;
2310
2311         while (ent && iter->idx < i)
2312                 ent = trace_find_next_entry_inc(iter);
2313
2314         iter->pos = *pos;
2315
2316         return ent;
2317 }
2318
2319 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2320 {
2321         struct ring_buffer_event *event;
2322         struct ring_buffer_iter *buf_iter;
2323         unsigned long entries = 0;
2324         u64 ts;
2325
2326         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2327
2328         buf_iter = trace_buffer_iter(iter, cpu);
2329         if (!buf_iter)
2330                 return;
2331
2332         ring_buffer_iter_reset(buf_iter);
2333
2334         /*
2335          * We could have the case with the max latency tracers
2336          * that a reset never took place on a cpu. This is evident
2337          * by the timestamp being before the start of the buffer.
2338          */
2339         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2340                 if (ts >= iter->trace_buffer->time_start)
2341                         break;
2342                 entries++;
2343                 ring_buffer_read(buf_iter, NULL);
2344         }
2345
2346         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2347 }
2348
2349 /*
2350  * The current tracer is copied to avoid a global locking
2351  * all around.
2352  */
2353 static void *s_start(struct seq_file *m, loff_t *pos)
2354 {
2355         struct trace_iterator *iter = m->private;
2356         struct trace_array *tr = iter->tr;
2357         int cpu_file = iter->cpu_file;
2358         void *p = NULL;
2359         loff_t l = 0;
2360         int cpu;
2361
2362         /*
2363          * copy the tracer to avoid using a global lock all around.
2364          * iter->trace is a copy of current_trace, the pointer to the
2365          * name may be used instead of a strcmp(), as iter->trace->name
2366          * will point to the same string as current_trace->name.
2367          */
2368         mutex_lock(&trace_types_lock);
2369         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2370                 *iter->trace = *tr->current_trace;
2371         mutex_unlock(&trace_types_lock);
2372
2373 #ifdef CONFIG_TRACER_MAX_TRACE
2374         if (iter->snapshot && iter->trace->use_max_tr)
2375                 return ERR_PTR(-EBUSY);
2376 #endif
2377
2378         if (!iter->snapshot)
2379                 atomic_inc(&trace_record_cmdline_disabled);
2380
2381         if (*pos != iter->pos) {
2382                 iter->ent = NULL;
2383                 iter->cpu = 0;
2384                 iter->idx = -1;
2385
2386                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2387                         for_each_tracing_cpu(cpu)
2388                                 tracing_iter_reset(iter, cpu);
2389                 } else
2390                         tracing_iter_reset(iter, cpu_file);
2391
2392                 iter->leftover = 0;
2393                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2394                         ;
2395
2396         } else {
2397                 /*
2398                  * If we overflowed the seq_file before, then we want
2399                  * to just reuse the trace_seq buffer again.
2400                  */
2401                 if (iter->leftover)
2402                         p = iter;
2403                 else {
2404                         l = *pos - 1;
2405                         p = s_next(m, p, &l);
2406                 }
2407         }
2408
2409         trace_event_read_lock();
2410         trace_access_lock(cpu_file);
2411         return p;
2412 }
2413
2414 static void s_stop(struct seq_file *m, void *p)
2415 {
2416         struct trace_iterator *iter = m->private;
2417
2418 #ifdef CONFIG_TRACER_MAX_TRACE
2419         if (iter->snapshot && iter->trace->use_max_tr)
2420                 return;
2421 #endif
2422
2423         if (!iter->snapshot)
2424                 atomic_dec(&trace_record_cmdline_disabled);
2425
2426         trace_access_unlock(iter->cpu_file);
2427         trace_event_read_unlock();
2428 }
2429
2430 static void
2431 get_total_entries(struct trace_buffer *buf,
2432                   unsigned long *total, unsigned long *entries)
2433 {
2434         unsigned long count;
2435         int cpu;
2436
2437         *total = 0;
2438         *entries = 0;
2439
2440         for_each_tracing_cpu(cpu) {
2441                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2442                 /*
2443                  * If this buffer has skipped entries, then we hold all
2444                  * entries for the trace and we need to ignore the
2445                  * ones before the time stamp.
2446                  */
2447                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2448                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2449                         /* total is the same as the entries */
2450                         *total += count;
2451                 } else
2452                         *total += count +
2453                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2454                 *entries += count;
2455         }
2456 }
2457
2458 static void print_lat_help_header(struct seq_file *m)
2459 {
2460         seq_puts(m, "#                  _------=> CPU#            \n");
2461         seq_puts(m, "#                 / _-----=> irqs-off        \n");
2462         seq_puts(m, "#                | / _----=> need-resched    \n");
2463         seq_puts(m, "#                || / _---=> hardirq/softirq \n");
2464         seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
2465         seq_puts(m, "#                |||| /     delay             \n");
2466         seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
2467         seq_puts(m, "#     \\   /      |||||  \\    |   /           \n");
2468 }
2469
2470 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2471 {
2472         unsigned long total;
2473         unsigned long entries;
2474
2475         get_total_entries(buf, &total, &entries);
2476         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2477                    entries, total, num_online_cpus());
2478         seq_puts(m, "#\n");
2479 }
2480
2481 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2482 {
2483         print_event_info(buf, m);
2484         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n");
2485         seq_puts(m, "#              | |       |          |         |\n");
2486 }
2487
2488 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2489 {
2490         print_event_info(buf, m);
2491         seq_puts(m, "#                              _-----=> irqs-off\n");
2492         seq_puts(m, "#                             / _----=> need-resched\n");
2493         seq_puts(m, "#                            | / _---=> hardirq/softirq\n");
2494         seq_puts(m, "#                            || / _--=> preempt-depth\n");
2495         seq_puts(m, "#                            ||| /     delay\n");
2496         seq_puts(m, "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n");
2497         seq_puts(m, "#              | |       |   ||||       |         |\n");
2498 }
2499
2500 void
2501 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2502 {
2503         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2504         struct trace_buffer *buf = iter->trace_buffer;
2505         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2506         struct tracer *type = iter->trace;
2507         unsigned long entries;
2508         unsigned long total;
2509         const char *name = "preemption";
2510
2511         name = type->name;
2512
2513         get_total_entries(buf, &total, &entries);
2514
2515         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2516                    name, UTS_RELEASE);
2517         seq_puts(m, "# -----------------------------------"
2518                  "---------------------------------\n");
2519         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2520                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2521                    nsecs_to_usecs(data->saved_latency),
2522                    entries,
2523                    total,
2524                    buf->cpu,
2525 #if defined(CONFIG_PREEMPT_NONE)
2526                    "server",
2527 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2528                    "desktop",
2529 #elif defined(CONFIG_PREEMPT)
2530                    "preempt",
2531 #else
2532                    "unknown",
2533 #endif
2534                    /* These are reserved for later use */
2535                    0, 0, 0, 0);
2536 #ifdef CONFIG_SMP
2537         seq_printf(m, " #P:%d)\n", num_online_cpus());
2538 #else
2539         seq_puts(m, ")\n");
2540 #endif
2541         seq_puts(m, "#    -----------------\n");
2542         seq_printf(m, "#    | task: %.16s-%d "
2543                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2544                    data->comm, data->pid,
2545                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2546                    data->policy, data->rt_priority);
2547         seq_puts(m, "#    -----------------\n");
2548
2549         if (data->critical_start) {
2550                 seq_puts(m, "#  => started at: ");
2551                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2552                 trace_print_seq(m, &iter->seq);
2553                 seq_puts(m, "\n#  => ended at:   ");
2554                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2555                 trace_print_seq(m, &iter->seq);
2556                 seq_puts(m, "\n#\n");
2557         }
2558
2559         seq_puts(m, "#\n");
2560 }
2561
2562 static void test_cpu_buff_start(struct trace_iterator *iter)
2563 {
2564         struct trace_seq *s = &iter->seq;
2565
2566         if (!(trace_flags & TRACE_ITER_ANNOTATE))
2567                 return;
2568
2569         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2570                 return;
2571
2572         if (cpumask_test_cpu(iter->cpu, iter->started))
2573                 return;
2574
2575         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2576                 return;
2577
2578         cpumask_set_cpu(iter->cpu, iter->started);
2579
2580         /* Don't print started cpu buffer for the first entry of the trace */
2581         if (iter->idx > 1)
2582                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2583                                 iter->cpu);
2584 }
2585
2586 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2587 {
2588         struct trace_seq *s = &iter->seq;
2589         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2590         struct trace_entry *entry;
2591         struct trace_event *event;
2592
2593         entry = iter->ent;
2594
2595         test_cpu_buff_start(iter);
2596
2597         event = ftrace_find_event(entry->type);
2598
2599         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2600                 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2601                         if (!trace_print_lat_context(iter))
2602                                 goto partial;
2603                 } else {
2604                         if (!trace_print_context(iter))
2605                                 goto partial;
2606                 }
2607         }
2608
2609         if (event)
2610                 return event->funcs->trace(iter, sym_flags, event);
2611
2612         if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
2613                 goto partial;
2614
2615         return TRACE_TYPE_HANDLED;
2616 partial:
2617         return TRACE_TYPE_PARTIAL_LINE;
2618 }
2619
2620 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2621 {
2622         struct trace_seq *s = &iter->seq;
2623         struct trace_entry *entry;
2624         struct trace_event *event;
2625
2626         entry = iter->ent;
2627
2628         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2629                 if (!trace_seq_printf(s, "%d %d %llu ",
2630                                       entry->pid, iter->cpu, iter->ts))
2631                         goto partial;
2632         }
2633
2634         event = ftrace_find_event(entry->type);
2635         if (event)
2636                 return event->funcs->raw(iter, 0, event);
2637
2638         if (!trace_seq_printf(s, "%d ?\n", entry->type))
2639                 goto partial;
2640
2641         return TRACE_TYPE_HANDLED;
2642 partial:
2643         return TRACE_TYPE_PARTIAL_LINE;
2644 }
2645
2646 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2647 {
2648         struct trace_seq *s = &iter->seq;
2649         unsigned char newline = '\n';
2650         struct trace_entry *entry;
2651         struct trace_event *event;
2652
2653         entry = iter->ent;
2654
2655         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2656                 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
2657                 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
2658                 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
2659         }
2660
2661         event = ftrace_find_event(entry->type);
2662         if (event) {
2663                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2664                 if (ret != TRACE_TYPE_HANDLED)
2665                         return ret;
2666         }
2667
2668         SEQ_PUT_FIELD_RET(s, newline);
2669
2670         return TRACE_TYPE_HANDLED;
2671 }
2672
2673 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2674 {
2675         struct trace_seq *s = &iter->seq;
2676         struct trace_entry *entry;
2677         struct trace_event *event;
2678
2679         entry = iter->ent;
2680
2681         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2682                 SEQ_PUT_FIELD_RET(s, entry->pid);
2683                 SEQ_PUT_FIELD_RET(s, iter->cpu);
2684                 SEQ_PUT_FIELD_RET(s, iter->ts);
2685         }
2686
2687         event = ftrace_find_event(entry->type);
2688         return event ? event->funcs->binary(iter, 0, event) :
2689                 TRACE_TYPE_HANDLED;
2690 }
2691
2692 int trace_empty(struct trace_iterator *iter)
2693 {
2694         struct ring_buffer_iter *buf_iter;
2695         int cpu;
2696
2697         /* If we are looking at one CPU buffer, only check that one */
2698         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2699                 cpu = iter->cpu_file;
2700                 buf_iter = trace_buffer_iter(iter, cpu);
2701                 if (buf_iter) {
2702                         if (!ring_buffer_iter_empty(buf_iter))
2703                                 return 0;
2704                 } else {
2705                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2706                                 return 0;
2707                 }
2708                 return 1;
2709         }
2710
2711         for_each_tracing_cpu(cpu) {
2712                 buf_iter = trace_buffer_iter(iter, cpu);
2713                 if (buf_iter) {
2714                         if (!ring_buffer_iter_empty(buf_iter))
2715                                 return 0;
2716                 } else {
2717                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2718                                 return 0;
2719                 }
2720         }
2721
2722         return 1;
2723 }
2724
2725 /*  Called with trace_event_read_lock() held. */
2726 enum print_line_t print_trace_line(struct trace_iterator *iter)
2727 {
2728         enum print_line_t ret;
2729
2730         if (iter->lost_events &&
2731             !trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2732                                  iter->cpu, iter->lost_events))
2733                 return TRACE_TYPE_PARTIAL_LINE;
2734
2735         if (iter->trace && iter->trace->print_line) {
2736                 ret = iter->trace->print_line(iter);
2737                 if (ret != TRACE_TYPE_UNHANDLED)
2738                         return ret;
2739         }
2740
2741         if (iter->ent->type == TRACE_BPUTS &&
2742                         trace_flags & TRACE_ITER_PRINTK &&
2743                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2744                 return trace_print_bputs_msg_only(iter);
2745
2746         if (iter->ent->type == TRACE_BPRINT &&
2747                         trace_flags & TRACE_ITER_PRINTK &&
2748                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2749                 return trace_print_bprintk_msg_only(iter);
2750
2751         if (iter->ent->type == TRACE_PRINT &&
2752                         trace_flags & TRACE_ITER_PRINTK &&
2753                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2754                 return trace_print_printk_msg_only(iter);
2755
2756         if (trace_flags & TRACE_ITER_BIN)
2757                 return print_bin_fmt(iter);
2758
2759         if (trace_flags & TRACE_ITER_HEX)
2760                 return print_hex_fmt(iter);
2761
2762         if (trace_flags & TRACE_ITER_RAW)
2763                 return print_raw_fmt(iter);
2764
2765         return print_trace_fmt(iter);
2766 }
2767
2768 void trace_latency_header(struct seq_file *m)
2769 {
2770         struct trace_iterator *iter = m->private;
2771
2772         /* print nothing if the buffers are empty */
2773         if (trace_empty(iter))
2774                 return;
2775
2776         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2777                 print_trace_header(m, iter);
2778
2779         if (!(trace_flags & TRACE_ITER_VERBOSE))
2780                 print_lat_help_header(m);
2781 }
2782
2783 void trace_default_header(struct seq_file *m)
2784 {
2785         struct trace_iterator *iter = m->private;
2786
2787         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2788                 return;
2789
2790         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2791                 /* print nothing if the buffers are empty */
2792                 if (trace_empty(iter))
2793                         return;
2794                 print_trace_header(m, iter);
2795                 if (!(trace_flags & TRACE_ITER_VERBOSE))
2796                         print_lat_help_header(m);
2797         } else {
2798                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2799                         if (trace_flags & TRACE_ITER_IRQ_INFO)
2800                                 print_func_help_header_irq(iter->trace_buffer, m);
2801                         else
2802                                 print_func_help_header(iter->trace_buffer, m);
2803                 }
2804         }
2805 }
2806
2807 static void test_ftrace_alive(struct seq_file *m)
2808 {
2809         if (!ftrace_is_dead())
2810                 return;
2811         seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n");
2812         seq_printf(m, "#          MAY BE MISSING FUNCTION EVENTS\n");
2813 }
2814
2815 #ifdef CONFIG_TRACER_MAX_TRACE
2816 static void show_snapshot_main_help(struct seq_file *m)
2817 {
2818         seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n");
2819         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2820         seq_printf(m, "#                      Takes a snapshot of the main buffer.\n");
2821         seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n");
2822         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2823         seq_printf(m, "#                       is not a '0' or '1')\n");
2824 }
2825
2826 static void show_snapshot_percpu_help(struct seq_file *m)
2827 {
2828         seq_printf(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2829 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2830         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2831         seq_printf(m, "#                      Takes a snapshot of the main buffer for this cpu.\n");
2832 #else
2833         seq_printf(m, "# echo 1 > snapshot : Not supported with this kernel.\n");
2834         seq_printf(m, "#                     Must use main snapshot file to allocate.\n");
2835 #endif
2836         seq_printf(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n");
2837         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2838         seq_printf(m, "#                       is not a '0' or '1')\n");
2839 }
2840
2841 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2842 {
2843         if (iter->tr->allocated_snapshot)
2844                 seq_printf(m, "#\n# * Snapshot is allocated *\n#\n");
2845         else
2846                 seq_printf(m, "#\n# * Snapshot is freed *\n#\n");
2847
2848         seq_printf(m, "# Snapshot commands:\n");
2849         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2850                 show_snapshot_main_help(m);
2851         else
2852                 show_snapshot_percpu_help(m);
2853 }
2854 #else
2855 /* Should never be called */
2856 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2857 #endif
2858
2859 static int s_show(struct seq_file *m, void *v)
2860 {
2861         struct trace_iterator *iter = v;
2862         int ret;
2863
2864         if (iter->ent == NULL) {
2865                 if (iter->tr) {
2866                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
2867                         seq_puts(m, "#\n");
2868                         test_ftrace_alive(m);
2869                 }
2870                 if (iter->snapshot && trace_empty(iter))
2871                         print_snapshot_help(m, iter);
2872                 else if (iter->trace && iter->trace->print_header)
2873                         iter->trace->print_header(m);
2874                 else
2875                         trace_default_header(m);
2876
2877         } else if (iter->leftover) {
2878                 /*
2879                  * If we filled the seq_file buffer earlier, we
2880                  * want to just show it now.
2881                  */
2882                 ret = trace_print_seq(m, &iter->seq);
2883
2884                 /* ret should this time be zero, but you never know */
2885                 iter->leftover = ret;
2886
2887         } else {
2888                 print_trace_line(iter);
2889                 ret = trace_print_seq(m, &iter->seq);
2890                 /*
2891                  * If we overflow the seq_file buffer, then it will
2892                  * ask us for this data again at start up.
2893                  * Use that instead.
2894                  *  ret is 0 if seq_file write succeeded.
2895                  *        -1 otherwise.
2896                  */
2897                 iter->leftover = ret;
2898         }
2899
2900         return 0;
2901 }
2902
2903 /*
2904  * Should be used after trace_array_get(), trace_types_lock
2905  * ensures that i_cdev was already initialized.
2906  */
2907 static inline int tracing_get_cpu(struct inode *inode)
2908 {
2909         if (inode->i_cdev) /* See trace_create_cpu_file() */
2910                 return (long)inode->i_cdev - 1;
2911         return RING_BUFFER_ALL_CPUS;
2912 }
2913
2914 static const struct seq_operations tracer_seq_ops = {
2915         .start          = s_start,
2916         .next           = s_next,
2917         .stop           = s_stop,
2918         .show           = s_show,
2919 };
2920
2921 static struct trace_iterator *
2922 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
2923 {
2924         struct trace_array *tr = inode->i_private;
2925         struct trace_iterator *iter;
2926         int cpu;
2927
2928         if (tracing_disabled)
2929                 return ERR_PTR(-ENODEV);
2930
2931         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
2932         if (!iter)
2933                 return ERR_PTR(-ENOMEM);
2934
2935         iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
2936                                     GFP_KERNEL);
2937         if (!iter->buffer_iter)
2938                 goto release;
2939
2940         /*
2941          * We make a copy of the current tracer to avoid concurrent
2942          * changes on it while we are reading.
2943          */
2944         mutex_lock(&trace_types_lock);
2945         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
2946         if (!iter->trace)
2947                 goto fail;
2948
2949         *iter->trace = *tr->current_trace;
2950
2951         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
2952                 goto fail;
2953
2954         iter->tr = tr;
2955
2956 #ifdef CONFIG_TRACER_MAX_TRACE
2957         /* Currently only the top directory has a snapshot */
2958         if (tr->current_trace->print_max || snapshot)
2959                 iter->trace_buffer = &tr->max_buffer;
2960         else
2961 #endif
2962                 iter->trace_buffer = &tr->trace_buffer;
2963         iter->snapshot = snapshot;
2964         iter->pos = -1;
2965         iter->cpu_file = tracing_get_cpu(inode);
2966         mutex_init(&iter->mutex);
2967
2968         /* Notify the tracer early; before we stop tracing. */
2969         if (iter->trace && iter->trace->open)
2970                 iter->trace->open(iter);
2971
2972         /* Annotate start of buffers if we had overruns */
2973         if (ring_buffer_overruns(iter->trace_buffer->buffer))
2974                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
2975
2976         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
2977         if (trace_clocks[tr->clock_id].in_ns)
2978                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
2979
2980         /* stop the trace while dumping if we are not opening "snapshot" */
2981         if (!iter->snapshot)
2982                 tracing_stop_tr(tr);
2983
2984         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
2985                 for_each_tracing_cpu(cpu) {
2986                         iter->buffer_iter[cpu] =
2987                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
2988                 }
2989                 ring_buffer_read_prepare_sync();
2990                 for_each_tracing_cpu(cpu) {
2991                         ring_buffer_read_start(iter->buffer_iter[cpu]);
2992                         tracing_iter_reset(iter, cpu);
2993                 }
2994         } else {
2995                 cpu = iter->cpu_file;
2996                 iter->buffer_iter[cpu] =
2997                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
2998                 ring_buffer_read_prepare_sync();
2999                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3000                 tracing_iter_reset(iter, cpu);
3001         }
3002
3003         mutex_unlock(&trace_types_lock);
3004
3005         return iter;
3006
3007  fail:
3008         mutex_unlock(&trace_types_lock);
3009         kfree(iter->trace);
3010         kfree(iter->buffer_iter);
3011 release:
3012         seq_release_private(inode, file);
3013         return ERR_PTR(-ENOMEM);
3014 }
3015
3016 int tracing_open_generic(struct inode *inode, struct file *filp)
3017 {
3018         if (tracing_disabled)
3019                 return -ENODEV;
3020
3021         filp->private_data = inode->i_private;
3022         return 0;
3023 }
3024
3025 bool tracing_is_disabled(void)
3026 {
3027         return (tracing_disabled) ? true: false;
3028 }
3029
3030 /*
3031  * Open and update trace_array ref count.
3032  * Must have the current trace_array passed to it.
3033  */
3034 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3035 {
3036         struct trace_array *tr = inode->i_private;
3037
3038         if (tracing_disabled)
3039                 return -ENODEV;
3040
3041         if (trace_array_get(tr) < 0)
3042                 return -ENODEV;
3043
3044         filp->private_data = inode->i_private;
3045
3046         return 0;
3047 }
3048
3049 static int tracing_release(struct inode *inode, struct file *file)
3050 {
3051         struct trace_array *tr = inode->i_private;
3052         struct seq_file *m = file->private_data;
3053         struct trace_iterator *iter;
3054         int cpu;
3055
3056         if (!(file->f_mode & FMODE_READ)) {
3057                 trace_array_put(tr);
3058                 return 0;
3059         }
3060
3061         /* Writes do not use seq_file */
3062         iter = m->private;
3063         mutex_lock(&trace_types_lock);
3064
3065         for_each_tracing_cpu(cpu) {
3066                 if (iter->buffer_iter[cpu])
3067                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3068         }
3069
3070         if (iter->trace && iter->trace->close)
3071                 iter->trace->close(iter);
3072
3073         if (!iter->snapshot)
3074                 /* reenable tracing if it was previously enabled */
3075                 tracing_start_tr(tr);
3076
3077         __trace_array_put(tr);
3078
3079         mutex_unlock(&trace_types_lock);
3080
3081         mutex_destroy(&iter->mutex);
3082         free_cpumask_var(iter->started);
3083         kfree(iter->trace);
3084         kfree(iter->buffer_iter);
3085         seq_release_private(inode, file);
3086
3087         return 0;
3088 }
3089
3090 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3091 {
3092         struct trace_array *tr = inode->i_private;
3093
3094         trace_array_put(tr);
3095         return 0;
3096 }
3097
3098 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3099 {
3100         struct trace_array *tr = inode->i_private;
3101
3102         trace_array_put(tr);
3103
3104         return single_release(inode, file);
3105 }
3106
3107 static int tracing_open(struct inode *inode, struct file *file)
3108 {
3109         struct trace_array *tr = inode->i_private;
3110         struct trace_iterator *iter;
3111         int ret = 0;
3112
3113         if (trace_array_get(tr) < 0)
3114                 return -ENODEV;
3115
3116         /* If this file was open for write, then erase contents */
3117         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3118                 int cpu = tracing_get_cpu(inode);
3119
3120                 if (cpu == RING_BUFFER_ALL_CPUS)
3121                         tracing_reset_online_cpus(&tr->trace_buffer);
3122                 else
3123                         tracing_reset(&tr->trace_buffer, cpu);
3124         }
3125
3126         if (file->f_mode & FMODE_READ) {
3127                 iter = __tracing_open(inode, file, false);
3128                 if (IS_ERR(iter))
3129                         ret = PTR_ERR(iter);
3130                 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
3131                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3132         }
3133
3134         if (ret < 0)
3135                 trace_array_put(tr);
3136
3137         return ret;
3138 }
3139
3140 static void *
3141 t_next(struct seq_file *m, void *v, loff_t *pos)
3142 {
3143         struct tracer *t = v;
3144
3145         (*pos)++;
3146
3147         if (t)
3148                 t = t->next;
3149
3150         return t;
3151 }
3152
3153 static void *t_start(struct seq_file *m, loff_t *pos)
3154 {
3155         struct tracer *t;
3156         loff_t l = 0;
3157
3158         mutex_lock(&trace_types_lock);
3159         for (t = trace_types; t && l < *pos; t = t_next(m, t, &l))
3160                 ;
3161
3162         return t;
3163 }
3164
3165 static void t_stop(struct seq_file *m, void *p)
3166 {
3167         mutex_unlock(&trace_types_lock);
3168 }
3169
3170 static int t_show(struct seq_file *m, void *v)
3171 {
3172         struct tracer *t = v;
3173
3174         if (!t)
3175                 return 0;
3176
3177         seq_printf(m, "%s", t->name);
3178         if (t->next)
3179                 seq_putc(m, ' ');
3180         else
3181                 seq_putc(m, '\n');
3182
3183         return 0;
3184 }
3185
3186 static const struct seq_operations show_traces_seq_ops = {
3187         .start          = t_start,
3188         .next           = t_next,
3189         .stop           = t_stop,
3190         .show           = t_show,
3191 };
3192
3193 static int show_traces_open(struct inode *inode, struct file *file)
3194 {
3195         if (tracing_disabled)
3196                 return -ENODEV;
3197
3198         return seq_open(file, &show_traces_seq_ops);
3199 }
3200
3201 static ssize_t
3202 tracing_write_stub(struct file *filp, const char __user *ubuf,
3203                    size_t count, loff_t *ppos)
3204 {
3205         return count;
3206 }
3207
3208 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3209 {
3210         int ret;
3211
3212         if (file->f_mode & FMODE_READ)
3213                 ret = seq_lseek(file, offset, whence);
3214         else
3215                 file->f_pos = ret = 0;
3216
3217         return ret;
3218 }
3219
3220 static const struct file_operations tracing_fops = {
3221         .open           = tracing_open,
3222         .read           = seq_read,
3223         .write          = tracing_write_stub,
3224         .llseek         = tracing_lseek,
3225         .release        = tracing_release,
3226 };
3227
3228 static const struct file_operations show_traces_fops = {
3229         .open           = show_traces_open,
3230         .read           = seq_read,
3231         .release        = seq_release,
3232         .llseek         = seq_lseek,
3233 };
3234
3235 /*
3236  * The tracer itself will not take this lock, but still we want
3237  * to provide a consistent cpumask to user-space:
3238  */
3239 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3240
3241 /*
3242  * Temporary storage for the character representation of the
3243  * CPU bitmask (and one more byte for the newline):
3244  */
3245 static char mask_str[NR_CPUS + 1];
3246
3247 static ssize_t
3248 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3249                      size_t count, loff_t *ppos)
3250 {
3251         struct trace_array *tr = file_inode(filp)->i_private;
3252         int len;
3253
3254         mutex_lock(&tracing_cpumask_update_lock);
3255
3256         len = cpumask_scnprintf(mask_str, count, tr->tracing_cpumask);
3257         if (count - len < 2) {
3258                 count = -EINVAL;
3259                 goto out_err;
3260         }
3261         len += sprintf(mask_str + len, "\n");
3262         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3263
3264 out_err:
3265         mutex_unlock(&tracing_cpumask_update_lock);
3266
3267         return count;
3268 }
3269
3270 static ssize_t
3271 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3272                       size_t count, loff_t *ppos)
3273 {
3274         struct trace_array *tr = file_inode(filp)->i_private;
3275         cpumask_var_t tracing_cpumask_new;
3276         int err, cpu;
3277
3278         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3279                 return -ENOMEM;
3280
3281         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3282         if (err)
3283                 goto err_unlock;
3284
3285         mutex_lock(&tracing_cpumask_update_lock);
3286
3287         local_irq_disable();
3288         arch_spin_lock(&ftrace_max_lock);
3289         for_each_tracing_cpu(cpu) {
3290                 /*
3291                  * Increase/decrease the disabled counter if we are
3292                  * about to flip a bit in the cpumask:
3293                  */
3294                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3295                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3296                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3297                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3298                 }
3299                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3300                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3301                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3302                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3303                 }
3304         }
3305         arch_spin_unlock(&ftrace_max_lock);
3306         local_irq_enable();
3307
3308         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3309
3310         mutex_unlock(&tracing_cpumask_update_lock);
3311         free_cpumask_var(tracing_cpumask_new);
3312
3313         return count;
3314
3315 err_unlock:
3316         free_cpumask_var(tracing_cpumask_new);
3317
3318         return err;
3319 }
3320
3321 static const struct file_operations tracing_cpumask_fops = {
3322         .open           = tracing_open_generic_tr,
3323         .read           = tracing_cpumask_read,
3324         .write          = tracing_cpumask_write,
3325         .release        = tracing_release_generic_tr,
3326         .llseek         = generic_file_llseek,
3327 };
3328
3329 static int tracing_trace_options_show(struct seq_file *m, void *v)
3330 {
3331         struct tracer_opt *trace_opts;
3332         struct trace_array *tr = m->private;
3333         u32 tracer_flags;
3334         int i;
3335
3336         mutex_lock(&trace_types_lock);
3337         tracer_flags = tr->current_trace->flags->val;
3338         trace_opts = tr->current_trace->flags->opts;
3339
3340         for (i = 0; trace_options[i]; i++) {
3341                 if (trace_flags & (1 << i))
3342                         seq_printf(m, "%s\n", trace_options[i]);
3343                 else
3344                         seq_printf(m, "no%s\n", trace_options[i]);
3345         }
3346
3347         for (i = 0; trace_opts[i].name; i++) {
3348                 if (tracer_flags & trace_opts[i].bit)
3349                         seq_printf(m, "%s\n", trace_opts[i].name);
3350                 else
3351                         seq_printf(m, "no%s\n", trace_opts[i].name);
3352         }
3353         mutex_unlock(&trace_types_lock);
3354
3355         return 0;
3356 }
3357
3358 static int __set_tracer_option(struct tracer *trace,
3359                                struct tracer_flags *tracer_flags,
3360                                struct tracer_opt *opts, int neg)
3361 {
3362         int ret;
3363
3364         ret = trace->set_flag(tracer_flags->val, opts->bit, !neg);
3365         if (ret)
3366                 return ret;
3367
3368         if (neg)
3369                 tracer_flags->val &= ~opts->bit;
3370         else
3371                 tracer_flags->val |= opts->bit;
3372         return 0;
3373 }
3374
3375 /* Try to assign a tracer specific option */
3376 static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
3377 {
3378         struct tracer_flags *tracer_flags = trace->flags;
3379         struct tracer_opt *opts = NULL;
3380         int i;
3381
3382         for (i = 0; tracer_flags->opts[i].name; i++) {
3383                 opts = &tracer_flags->opts[i];
3384
3385                 if (strcmp(cmp, opts->name) == 0)
3386                         return __set_tracer_option(trace, trace->flags,
3387                                                    opts, neg);
3388         }
3389
3390         return -EINVAL;
3391 }
3392
3393 /* Some tracers require overwrite to stay enabled */
3394 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3395 {
3396         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3397                 return -1;
3398
3399         return 0;
3400 }
3401
3402 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3403 {
3404         /* do nothing if flag is already set */
3405         if (!!(trace_flags & mask) == !!enabled)
3406                 return 0;
3407
3408         /* Give the tracer a chance to approve the change */
3409         if (tr->current_trace->flag_changed)
3410                 if (tr->current_trace->flag_changed(tr->current_trace, mask, !!enabled))
3411                         return -EINVAL;
3412
3413         if (enabled)
3414                 trace_flags |= mask;
3415         else
3416                 trace_flags &= ~mask;
3417
3418         if (mask == TRACE_ITER_RECORD_CMD)
3419                 trace_event_enable_cmd_record(enabled);
3420
3421         if (mask == TRACE_ITER_OVERWRITE) {
3422                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3423 #ifdef CONFIG_TRACER_MAX_TRACE
3424                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3425 #endif
3426         }
3427
3428         if (mask == TRACE_ITER_PRINTK)
3429                 trace_printk_start_stop_comm(enabled);
3430
3431         return 0;
3432 }
3433
3434 static int trace_set_options(struct trace_array *tr, char *option)
3435 {
3436         char *cmp;
3437         int neg = 0;
3438         int ret = -ENODEV;
3439         int i;
3440
3441         cmp = strstrip(option);
3442
3443         if (strncmp(cmp, "no", 2) == 0) {
3444                 neg = 1;
3445                 cmp += 2;
3446         }
3447
3448         mutex_lock(&trace_types_lock);
3449
3450         for (i = 0; trace_options[i]; i++) {
3451                 if (strcmp(cmp, trace_options[i]) == 0) {
3452                         ret = set_tracer_flag(tr, 1 << i, !neg);
3453                         break;
3454                 }
3455         }
3456
3457         /* If no option could be set, test the specific tracer options */
3458         if (!trace_options[i])
3459                 ret = set_tracer_option(tr->current_trace, cmp, neg);
3460
3461         mutex_unlock(&trace_types_lock);
3462
3463         return ret;
3464 }
3465
3466 static ssize_t
3467 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3468                         size_t cnt, loff_t *ppos)
3469 {
3470         struct seq_file *m = filp->private_data;
3471         struct trace_array *tr = m->private;
3472         char buf[64];
3473         int ret;
3474
3475         if (cnt >= sizeof(buf))
3476                 return -EINVAL;
3477
3478         if (copy_from_user(&buf, ubuf, cnt))
3479                 return -EFAULT;
3480
3481         buf[cnt] = 0;
3482
3483         ret = trace_set_options(tr, buf);
3484         if (ret < 0)
3485                 return ret;
3486
3487         *ppos += cnt;
3488
3489         return cnt;
3490 }
3491
3492 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3493 {
3494         struct trace_array *tr = inode->i_private;
3495         int ret;
3496
3497         if (tracing_disabled)
3498                 return -ENODEV;
3499
3500         if (trace_array_get(tr) < 0)
3501                 return -ENODEV;
3502
3503         ret = single_open(file, tracing_trace_options_show, inode->i_private);
3504         if (ret < 0)
3505                 trace_array_put(tr);
3506
3507         return ret;
3508 }
3509
3510 static const struct file_operations tracing_iter_fops = {
3511         .open           = tracing_trace_options_open,
3512         .read           = seq_read,
3513         .llseek         = seq_lseek,
3514         .release        = tracing_single_release_tr,
3515         .write          = tracing_trace_options_write,
3516 };
3517
3518 static const char readme_msg[] =
3519         "tracing mini-HOWTO:\n\n"
3520         "# echo 0 > tracing_on : quick way to disable tracing\n"
3521         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3522         " Important files:\n"
3523         "  trace\t\t\t- The static contents of the buffer\n"
3524         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
3525         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3526         "  current_tracer\t- function and latency tracers\n"
3527         "  available_tracers\t- list of configured tracers for current_tracer\n"
3528         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3529         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3530         "  trace_clock\t\t-change the clock used to order events\n"
3531         "       local:   Per cpu clock but may not be synced across CPUs\n"
3532         "      global:   Synced across CPUs but slows tracing down.\n"
3533         "     counter:   Not a clock, but just an increment\n"
3534         "      uptime:   Jiffy counter from time of boot\n"
3535         "        perf:   Same clock that perf events use\n"
3536 #ifdef CONFIG_X86_64
3537         "     x86-tsc:   TSC cycle counter\n"
3538 #endif
3539         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3540         "  tracing_cpumask\t- Limit which CPUs to trace\n"
3541         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3542         "\t\t\t  Remove sub-buffer with rmdir\n"
3543         "  trace_options\t\t- Set format or modify how tracing happens\n"
3544         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
3545         "\t\t\t  option name\n"
3546 #ifdef CONFIG_DYNAMIC_FTRACE
3547         "\n  available_filter_functions - list of functions that can be filtered on\n"
3548         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
3549         "\t\t\t  functions\n"
3550         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3551         "\t     modules: Can select a group via module\n"
3552         "\t      Format: :mod:<module-name>\n"
3553         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
3554         "\t    triggers: a command to perform when function is hit\n"
3555         "\t      Format: <function>:<trigger>[:count]\n"
3556         "\t     trigger: traceon, traceoff\n"
3557         "\t\t      enable_event:<system>:<event>\n"
3558         "\t\t      disable_event:<system>:<event>\n"
3559 #ifdef CONFIG_STACKTRACE
3560         "\t\t      stacktrace\n"
3561 #endif
3562 #ifdef CONFIG_TRACER_SNAPSHOT
3563         "\t\t      snapshot\n"
3564 #endif
3565         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
3566         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
3567         "\t     The first one will disable tracing every time do_fault is hit\n"
3568         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
3569         "\t       The first time do trap is hit and it disables tracing, the\n"
3570         "\t       counter will decrement to 2. If tracing is already disabled,\n"
3571         "\t       the counter will not decrement. It only decrements when the\n"
3572         "\t       trigger did work\n"
3573         "\t     To remove trigger without count:\n"
3574         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
3575         "\t     To remove trigger with a count:\n"
3576         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3577         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3578         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3579         "\t    modules: Can select a group via module command :mod:\n"
3580         "\t    Does not accept triggers\n"
3581 #endif /* CONFIG_DYNAMIC_FTRACE */
3582 #ifdef CONFIG_FUNCTION_TRACER
3583         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3584         "\t\t    (function)\n"
3585 #endif
3586 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3587         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3588         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3589 #endif
3590 #ifdef CONFIG_TRACER_SNAPSHOT
3591         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
3592         "\t\t\t  snapshot buffer. Read the contents for more\n"
3593         "\t\t\t  information\n"
3594 #endif
3595 #ifdef CONFIG_STACK_TRACER
3596         "  stack_trace\t\t- Shows the max stack trace when active\n"
3597         "  stack_max_size\t- Shows current max stack size that was traced\n"
3598         "\t\t\t  Write into this file to reset the max size (trigger a\n"
3599         "\t\t\t  new trace)\n"
3600 #ifdef CONFIG_DYNAMIC_FTRACE
3601         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3602         "\t\t\t  traces\n"
3603 #endif
3604 #endif /* CONFIG_STACK_TRACER */
3605         "  events/\t\t- Directory containing all trace event subsystems:\n"
3606         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3607         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
3608         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3609         "\t\t\t  events\n"
3610         "      filter\t\t- If set, only events passing filter are traced\n"
3611         "  events/<system>/<event>/\t- Directory containing control files for\n"
3612         "\t\t\t  <event>:\n"
3613         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3614         "      filter\t\t- If set, only events passing filter are traced\n"
3615         "      trigger\t\t- If set, a command to perform when event is hit\n"
3616         "\t    Format: <trigger>[:count][if <filter>]\n"
3617         "\t   trigger: traceon, traceoff\n"
3618         "\t            enable_event:<system>:<event>\n"
3619         "\t            disable_event:<system>:<event>\n"
3620 #ifdef CONFIG_STACKTRACE
3621         "\t\t    stacktrace\n"
3622 #endif
3623 #ifdef CONFIG_TRACER_SNAPSHOT
3624         "\t\t    snapshot\n"
3625 #endif
3626         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
3627         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
3628         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3629         "\t                  events/block/block_unplug/trigger\n"
3630         "\t   The first disables tracing every time block_unplug is hit.\n"
3631         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
3632         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
3633         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3634         "\t   Like function triggers, the counter is only decremented if it\n"
3635         "\t    enabled or disabled tracing.\n"
3636         "\t   To remove a trigger without a count:\n"
3637         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
3638         "\t   To remove a trigger with a count:\n"
3639         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
3640         "\t   Filters can be ignored when removing a trigger.\n"
3641 ;
3642
3643 static ssize_t
3644 tracing_readme_read(struct file *filp, char __user *ubuf,
3645                        size_t cnt, loff_t *ppos)
3646 {
3647         return simple_read_from_buffer(ubuf, cnt, ppos,
3648                                         readme_msg, strlen(readme_msg));
3649 }
3650
3651 static const struct file_operations tracing_readme_fops = {
3652         .open           = tracing_open_generic,
3653         .read           = tracing_readme_read,
3654         .llseek         = generic_file_llseek,
3655 };
3656
3657 static ssize_t
3658 tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
3659                                 size_t cnt, loff_t *ppos)
3660 {
3661         char *buf_comm;
3662         char *file_buf;
3663         char *buf;
3664         int len = 0;
3665         int pid;
3666         int i;
3667
3668         file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL);
3669         if (!file_buf)
3670                 return -ENOMEM;
3671
3672         buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL);
3673         if (!buf_comm) {
3674                 kfree(file_buf);
3675                 return -ENOMEM;
3676         }
3677
3678         buf = file_buf;
3679
3680         for (i = 0; i < SAVED_CMDLINES; i++) {
3681                 int r;
3682
3683                 pid = map_cmdline_to_pid[i];
3684                 if (pid == -1 || pid == NO_CMDLINE_MAP)
3685                         continue;
3686
3687                 trace_find_cmdline(pid, buf_comm);
3688                 r = sprintf(buf, "%d %s\n", pid, buf_comm);
3689                 buf += r;
3690                 len += r;
3691         }
3692
3693         len = simple_read_from_buffer(ubuf, cnt, ppos,
3694                                       file_buf, len);
3695
3696         kfree(file_buf);
3697         kfree(buf_comm);
3698
3699         return len;
3700 }
3701
3702 static const struct file_operations tracing_saved_cmdlines_fops = {
3703     .open       = tracing_open_generic,
3704     .read       = tracing_saved_cmdlines_read,
3705     .llseek     = generic_file_llseek,
3706 };
3707
3708 static ssize_t
3709 tracing_set_trace_read(struct file *filp, char __user *ubuf,
3710                        size_t cnt, loff_t *ppos)
3711 {
3712         struct trace_array *tr = filp->private_data;
3713         char buf[MAX_TRACER_SIZE+2];
3714         int r;
3715
3716         mutex_lock(&trace_types_lock);
3717         r = sprintf(buf, "%s\n", tr->current_trace->name);
3718         mutex_unlock(&trace_types_lock);
3719
3720         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3721 }
3722
3723 int tracer_init(struct tracer *t, struct trace_array *tr)
3724 {
3725         tracing_reset_online_cpus(&tr->trace_buffer);
3726         return t->init(tr);
3727 }
3728
3729 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
3730 {
3731         int cpu;
3732
3733         for_each_tracing_cpu(cpu)
3734                 per_cpu_ptr(buf->data, cpu)->entries = val;
3735 }
3736
3737 #ifdef CONFIG_TRACER_MAX_TRACE
3738 /* resize @tr's buffer to the size of @size_tr's entries */
3739 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
3740                                         struct trace_buffer *size_buf, int cpu_id)
3741 {
3742         int cpu, ret = 0;
3743
3744         if (cpu_id == RING_BUFFER_ALL_CPUS) {
3745                 for_each_tracing_cpu(cpu) {
3746                         ret = ring_buffer_resize(trace_buf->buffer,
3747                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
3748                         if (ret < 0)
3749                                 break;
3750                         per_cpu_ptr(trace_buf->data, cpu)->entries =
3751                                 per_cpu_ptr(size_buf->data, cpu)->entries;
3752                 }
3753         } else {
3754                 ret = ring_buffer_resize(trace_buf->buffer,
3755                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
3756                 if (ret == 0)
3757                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
3758                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
3759         }
3760
3761         return ret;
3762 }
3763 #endif /* CONFIG_TRACER_MAX_TRACE */
3764
3765 static int __tracing_resize_ring_buffer(struct trace_array *tr,
3766                                         unsigned long size, int cpu)
3767 {
3768         int ret;
3769
3770         /*
3771          * If kernel or user changes the size of the ring buffer
3772          * we use the size that was given, and we can forget about
3773          * expanding it later.
3774          */
3775         ring_buffer_expanded = true;
3776
3777         /* May be called before buffers are initialized */
3778         if (!tr->trace_buffer.buffer)
3779                 return 0;
3780
3781         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
3782         if (ret < 0)
3783                 return ret;
3784
3785 #ifdef CONFIG_TRACER_MAX_TRACE
3786         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
3787             !tr->current_trace->use_max_tr)
3788                 goto out;
3789
3790         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
3791         if (ret < 0) {
3792                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
3793                                                      &tr->trace_buffer, cpu);
3794                 if (r < 0) {
3795                         /*
3796                          * AARGH! We are left with different
3797                          * size max buffer!!!!
3798                          * The max buffer is our "snapshot" buffer.
3799                          * When a tracer needs a snapshot (one of the
3800                          * latency tracers), it swaps the max buffer
3801                          * with the saved snap shot. We succeeded to
3802                          * update the size of the main buffer, but failed to
3803                          * update the size of the max buffer. But when we tried
3804                          * to reset the main buffer to the original size, we
3805                          * failed there too. This is very unlikely to
3806                          * happen, but if it does, warn and kill all
3807                          * tracing.
3808                          */
3809                         WARN_ON(1);
3810                         tracing_disabled = 1;
3811                 }
3812                 return ret;
3813         }
3814
3815         if (cpu == RING_BUFFER_ALL_CPUS)
3816                 set_buffer_entries(&tr->max_buffer, size);
3817         else
3818                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
3819
3820  out:
3821 #endif /* CONFIG_TRACER_MAX_TRACE */
3822
3823         if (cpu == RING_BUFFER_ALL_CPUS)
3824                 set_buffer_entries(&tr->trace_buffer, size);
3825         else
3826                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
3827
3828         return ret;
3829 }
3830
3831 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
3832                                           unsigned long size, int cpu_id)
3833 {
3834         int ret = size;
3835
3836         mutex_lock(&trace_types_lock);
3837
3838         if (cpu_id != RING_BUFFER_ALL_CPUS) {
3839                 /* make sure, this cpu is enabled in the mask */
3840                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
3841                         ret = -EINVAL;
3842                         goto out;
3843                 }
3844         }
3845
3846         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
3847         if (ret < 0)
3848                 ret = -ENOMEM;
3849
3850 out:
3851         mutex_unlock(&trace_types_lock);
3852
3853         return ret;
3854 }
3855
3856
3857 /**
3858  * tracing_update_buffers - used by tracing facility to expand ring buffers
3859  *
3860  * To save on memory when the tracing is never used on a system with it
3861  * configured in. The ring buffers are set to a minimum size. But once
3862  * a user starts to use the tracing facility, then they need to grow
3863  * to their default size.
3864  *
3865  * This function is to be called when a tracer is about to be used.
3866  */
3867 int tracing_update_buffers(void)
3868 {
3869         int ret = 0;
3870
3871         mutex_lock(&trace_types_lock);
3872         if (!ring_buffer_expanded)
3873                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
3874                                                 RING_BUFFER_ALL_CPUS);
3875         mutex_unlock(&trace_types_lock);
3876
3877         return ret;
3878 }
3879
3880 struct trace_option_dentry;
3881
3882 static struct trace_option_dentry *
3883 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
3884
3885 static void
3886 destroy_trace_option_files(struct trace_option_dentry *topts);
3887
3888 static int tracing_set_tracer(const char *buf)
3889 {
3890         static struct trace_option_dentry *topts;
3891         struct trace_array *tr = &global_trace;
3892         struct tracer *t;
3893 #ifdef CONFIG_TRACER_MAX_TRACE
3894         bool had_max_tr;
3895 #endif
3896         int ret = 0;
3897
3898         mutex_lock(&trace_types_lock);
3899
3900         if (!ring_buffer_expanded) {
3901                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
3902                                                 RING_BUFFER_ALL_CPUS);
3903                 if (ret < 0)
3904                         goto out;
3905                 ret = 0;
3906         }
3907
3908         for (t = trace_types; t; t = t->next) {
3909                 if (strcmp(t->name, buf) == 0)
3910                         break;
3911         }
3912         if (!t) {
3913                 ret = -EINVAL;
3914                 goto out;
3915         }
3916         if (t == tr->current_trace)
3917                 goto out;
3918
3919         trace_branch_disable();
3920
3921         tr->current_trace->enabled = false;
3922
3923         if (tr->current_trace->reset)
3924                 tr->current_trace->reset(tr);
3925
3926         /* Current trace needs to be nop_trace before synchronize_sched */
3927         tr->current_trace = &nop_trace;
3928
3929 #ifdef CONFIG_TRACER_MAX_TRACE
3930         had_max_tr = tr->allocated_snapshot;
3931
3932         if (had_max_tr && !t->use_max_tr) {
3933                 /*
3934                  * We need to make sure that the update_max_tr sees that
3935                  * current_trace changed to nop_trace to keep it from
3936                  * swapping the buffers after we resize it.
3937                  * The update_max_tr is called from interrupts disabled
3938                  * so a synchronized_sched() is sufficient.
3939                  */
3940                 synchronize_sched();
3941                 free_snapshot(tr);
3942         }
3943 #endif
3944         destroy_trace_option_files(topts);
3945
3946         topts = create_trace_option_files(tr, t);
3947
3948 #ifdef CONFIG_TRACER_MAX_TRACE
3949         if (t->use_max_tr && !had_max_tr) {
3950                 ret = alloc_snapshot(tr);
3951                 if (ret < 0)
3952                         goto out;
3953         }
3954 #endif
3955
3956         if (t->init) {
3957                 ret = tracer_init(t, tr);
3958                 if (ret)
3959                         goto out;
3960         }
3961
3962         tr->current_trace = t;
3963         tr->current_trace->enabled = true;
3964         trace_branch_enable(tr);
3965  out:
3966         mutex_unlock(&trace_types_lock);
3967
3968         return ret;
3969 }
3970
3971 static ssize_t
3972 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
3973                         size_t cnt, loff_t *ppos)
3974 {
3975         char buf[MAX_TRACER_SIZE+1];
3976         int i;
3977         size_t ret;
3978         int err;
3979
3980         ret = cnt;
3981
3982         if (cnt > MAX_TRACER_SIZE)
3983                 cnt = MAX_TRACER_SIZE;
3984
3985         if (copy_from_user(&buf, ubuf, cnt))
3986                 return -EFAULT;
3987
3988         buf[cnt] = 0;
3989
3990         /* strip ending whitespace. */
3991         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
3992                 buf[i] = 0;
3993
3994         err = tracing_set_tracer(buf);
3995         if (err)
3996                 return err;
3997
3998         *ppos += ret;
3999
4000         return ret;
4001 }
4002
4003 static ssize_t
4004 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4005                      size_t cnt, loff_t *ppos)
4006 {
4007         unsigned long *ptr = filp->private_data;
4008         char buf[64];
4009         int r;
4010
4011         r = snprintf(buf, sizeof(buf), "%ld\n",
4012                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4013         if (r > sizeof(buf))
4014                 r = sizeof(buf);
4015         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4016 }
4017
4018 static ssize_t
4019 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4020                       size_t cnt, loff_t *ppos)
4021 {
4022         unsigned long *ptr = filp->private_data;
4023         unsigned long val;
4024         int ret;
4025
4026         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4027         if (ret)
4028                 return ret;
4029
4030         *ptr = val * 1000;
4031
4032         return cnt;
4033 }
4034
4035 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4036 {
4037         struct trace_array *tr = inode->i_private;
4038         struct trace_iterator *iter;
4039         int ret = 0;
4040
4041         if (tracing_disabled)
4042                 return -ENODEV;
4043
4044         if (trace_array_get(tr) < 0)
4045                 return -ENODEV;
4046
4047         mutex_lock(&trace_types_lock);
4048
4049         /* create a buffer to store the information to pass to userspace */
4050         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4051         if (!iter) {
4052                 ret = -ENOMEM;
4053                 __trace_array_put(tr);
4054                 goto out;
4055         }
4056
4057         /*
4058          * We make a copy of the current tracer to avoid concurrent
4059          * changes on it while we are reading.
4060          */
4061         iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
4062         if (!iter->trace) {
4063                 ret = -ENOMEM;
4064                 goto fail;
4065         }
4066         *iter->trace = *tr->current_trace;
4067
4068         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4069                 ret = -ENOMEM;
4070                 goto fail;
4071         }
4072
4073         /* trace pipe does not show start of buffer */
4074         cpumask_setall(iter->started);
4075
4076         if (trace_flags & TRACE_ITER_LATENCY_FMT)
4077                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4078
4079         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4080         if (trace_clocks[tr->clock_id].in_ns)
4081                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4082
4083         iter->tr = tr;
4084         iter->trace_buffer = &tr->trace_buffer;
4085         iter->cpu_file = tracing_get_cpu(inode);
4086         mutex_init(&iter->mutex);
4087         filp->private_data = iter;
4088
4089         if (iter->trace->pipe_open)
4090                 iter->trace->pipe_open(iter);
4091
4092         nonseekable_open(inode, filp);
4093 out:
4094         mutex_unlock(&trace_types_lock);
4095         return ret;
4096
4097 fail:
4098         kfree(iter->trace);
4099         kfree(iter);
4100         __trace_array_put(tr);
4101         mutex_unlock(&trace_types_lock);
4102         return ret;
4103 }
4104
4105 static int tracing_release_pipe(struct inode *inode, struct file *file)
4106 {
4107         struct trace_iterator *iter = file->private_data;
4108         struct trace_array *tr = inode->i_private;
4109
4110         mutex_lock(&trace_types_lock);
4111
4112         if (iter->trace->pipe_close)
4113                 iter->trace->pipe_close(iter);
4114
4115         mutex_unlock(&trace_types_lock);
4116
4117         free_cpumask_var(iter->started);
4118         mutex_destroy(&iter->mutex);
4119         kfree(iter->trace);
4120         kfree(iter);
4121
4122         trace_array_put(tr);
4123
4124         return 0;
4125 }
4126
4127 static unsigned int
4128 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4129 {
4130         /* Iterators are static, they should be filled or empty */
4131         if (trace_buffer_iter(iter, iter->cpu_file))
4132                 return POLLIN | POLLRDNORM;
4133
4134         if (trace_flags & TRACE_ITER_BLOCK)
4135                 /*
4136                  * Always select as readable when in blocking mode
4137                  */
4138                 return POLLIN | POLLRDNORM;
4139         else
4140                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4141                                              filp, poll_table);
4142 }
4143
4144 static unsigned int
4145 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4146 {
4147         struct trace_iterator *iter = filp->private_data;
4148
4149         return trace_poll(iter, filp, poll_table);
4150 }
4151
4152 /*
4153  * This is a make-shift waitqueue.
4154  * A tracer might use this callback on some rare cases:
4155  *
4156  *  1) the current tracer might hold the runqueue lock when it wakes up
4157  *     a reader, hence a deadlock (sched, function, and function graph tracers)
4158  *  2) the function tracers, trace all functions, we don't want
4159  *     the overhead of calling wake_up and friends
4160  *     (and tracing them too)
4161  *
4162  *     Anyway, this is really very primitive wakeup.
4163  */
4164 void poll_wait_pipe(struct trace_iterator *iter)
4165 {
4166         set_current_state(TASK_INTERRUPTIBLE);
4167         /* sleep for 100 msecs, and try again. */
4168         schedule_timeout(HZ / 10);
4169 }
4170
4171 /* Must be called with trace_types_lock mutex held. */
4172 static int tracing_wait_pipe(struct file *filp)
4173 {
4174         struct trace_iterator *iter = filp->private_data;
4175
4176         while (trace_empty(iter)) {
4177
4178                 if ((filp->f_flags & O_NONBLOCK)) {
4179                         return -EAGAIN;
4180                 }
4181
4182                 mutex_unlock(&iter->mutex);
4183
4184                 iter->trace->wait_pipe(iter);
4185
4186                 mutex_lock(&iter->mutex);
4187
4188                 if (signal_pending(current))
4189                         return -EINTR;
4190
4191                 /*
4192                  * We block until we read something and tracing is disabled.
4193                  * We still block if tracing is disabled, but we have never
4194                  * read anything. This allows a user to cat this file, and
4195                  * then enable tracing. But after we have read something,
4196                  * we give an EOF when tracing is again disabled.
4197                  *
4198                  * iter->pos will be 0 if we haven't read anything.
4199                  */
4200                 if (!tracing_is_on() && iter->pos)
4201                         break;
4202         }
4203
4204         return 1;
4205 }
4206
4207 /*
4208  * Consumer reader.
4209  */
4210 static ssize_t
4211 tracing_read_pipe(struct file *filp, char __user *ubuf,
4212                   size_t cnt, loff_t *ppos)
4213 {
4214         struct trace_iterator *iter = filp->private_data;
4215         struct trace_array *tr = iter->tr;
4216         ssize_t sret;
4217
4218         /* return any leftover data */
4219         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4220         if (sret != -EBUSY)
4221                 return sret;
4222
4223         trace_seq_init(&iter->seq);
4224
4225         /* copy the tracer to avoid using a global lock all around */
4226         mutex_lock(&trace_types_lock);
4227         if (unlikely(iter->trace->name != tr->current_trace->name))
4228                 *iter->trace = *tr->current_trace;
4229         mutex_unlock(&trace_types_lock);
4230
4231         /*
4232          * Avoid more than one consumer on a single file descriptor
4233          * This is just a matter of traces coherency, the ring buffer itself
4234          * is protected.
4235          */
4236         mutex_lock(&iter->mutex);
4237         if (iter->trace->read) {
4238                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4239                 if (sret)
4240                         goto out;
4241         }
4242
4243 waitagain:
4244         sret = tracing_wait_pipe(filp);
4245         if (sret <= 0)
4246                 goto out;
4247
4248         /* stop when tracing is finished */
4249         if (trace_empty(iter)) {
4250                 sret = 0;
4251                 goto out;
4252         }
4253
4254         if (cnt >= PAGE_SIZE)
4255                 cnt = PAGE_SIZE - 1;
4256
4257         /* reset all but tr, trace, and overruns */
4258         memset(&iter->seq, 0,
4259                sizeof(struct trace_iterator) -
4260                offsetof(struct trace_iterator, seq));
4261         cpumask_clear(iter->started);
4262         iter->pos = -1;
4263
4264         trace_event_read_lock();
4265         trace_access_lock(iter->cpu_file);
4266         while (trace_find_next_entry_inc(iter) != NULL) {
4267                 enum print_line_t ret;
4268                 int len = iter->seq.len;
4269
4270                 ret = print_trace_line(iter);
4271                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4272                         /* don't print partial lines */
4273                         iter->seq.len = len;
4274                         break;
4275                 }
4276                 if (ret != TRACE_TYPE_NO_CONSUME)
4277                         trace_consume(iter);
4278
4279                 if (iter->seq.len >= cnt)
4280                         break;
4281
4282                 /*
4283                  * Setting the full flag means we reached the trace_seq buffer
4284                  * size and we should leave by partial output condition above.
4285                  * One of the trace_seq_* functions is not used properly.
4286                  */
4287                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4288                           iter->ent->type);
4289         }
4290         trace_access_unlock(iter->cpu_file);
4291         trace_event_read_unlock();
4292
4293         /* Now copy what we have to the user */
4294         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4295         if (iter->seq.readpos >= iter->seq.len)
4296                 trace_seq_init(&iter->seq);
4297
4298         /*
4299          * If there was nothing to send to user, in spite of consuming trace
4300          * entries, go back to wait for more entries.
4301          */
4302         if (sret == -EBUSY)
4303                 goto waitagain;
4304
4305 out:
4306         mutex_unlock(&iter->mutex);
4307
4308         return sret;
4309 }
4310
4311 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4312                                      unsigned int idx)
4313 {
4314         __free_page(spd->pages[idx]);
4315 }
4316
4317 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4318         .can_merge              = 0,
4319         .map                    = generic_pipe_buf_map,
4320         .unmap                  = generic_pipe_buf_unmap,
4321         .confirm                = generic_pipe_buf_confirm,
4322         .release                = generic_pipe_buf_release,
4323         .steal                  = generic_pipe_buf_steal,
4324         .get                    = generic_pipe_buf_get,
4325 };
4326
4327 static size_t
4328 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4329 {
4330         size_t count;
4331         int ret;
4332
4333         /* Seq buffer is page-sized, exactly what we need. */
4334         for (;;) {
4335                 count = iter->seq.len;
4336                 ret = print_trace_line(iter);
4337                 count = iter->seq.len - count;
4338                 if (rem < count) {
4339                         rem = 0;
4340                         iter->seq.len -= count;
4341                         break;
4342                 }
4343                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4344                         iter->seq.len -= count;
4345                         break;
4346                 }
4347
4348                 if (ret != TRACE_TYPE_NO_CONSUME)
4349                         trace_consume(iter);
4350                 rem -= count;
4351                 if (!trace_find_next_entry_inc(iter))   {
4352                         rem = 0;
4353                         iter->ent = NULL;
4354                         break;
4355                 }
4356         }
4357
4358         return rem;
4359 }
4360
4361 static ssize_t tracing_splice_read_pipe(struct file *filp,
4362                                         loff_t *ppos,
4363                                         struct pipe_inode_info *pipe,
4364                                         size_t len,
4365                                         unsigned int flags)
4366 {
4367         struct page *pages_def[PIPE_DEF_BUFFERS];
4368         struct partial_page partial_def[PIPE_DEF_BUFFERS];
4369         struct trace_iterator *iter = filp->private_data;
4370         struct splice_pipe_desc spd = {
4371                 .pages          = pages_def,
4372                 .partial        = partial_def,
4373                 .nr_pages       = 0, /* This gets updated below. */
4374                 .nr_pages_max   = PIPE_DEF_BUFFERS,
4375                 .flags          = flags,
4376                 .ops            = &tracing_pipe_buf_ops,
4377                 .spd_release    = tracing_spd_release_pipe,
4378         };
4379         struct trace_array *tr = iter->tr;
4380         ssize_t ret;
4381         size_t rem;
4382         unsigned int i;
4383
4384         if (splice_grow_spd(pipe, &spd))
4385                 return -ENOMEM;
4386
4387         /* copy the tracer to avoid using a global lock all around */
4388         mutex_lock(&trace_types_lock);
4389         if (unlikely(iter->trace->name != tr->current_trace->name))
4390                 *iter->trace = *tr->current_trace;
4391         mutex_unlock(&trace_types_lock);
4392
4393         mutex_lock(&iter->mutex);
4394
4395         if (iter->trace->splice_read) {
4396                 ret = iter->trace->splice_read(iter, filp,
4397                                                ppos, pipe, len, flags);
4398                 if (ret)
4399                         goto out_err;
4400         }
4401
4402         ret = tracing_wait_pipe(filp);
4403         if (ret <= 0)
4404                 goto out_err;
4405
4406         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4407                 ret = -EFAULT;
4408                 goto out_err;
4409         }
4410
4411         trace_event_read_lock();
4412         trace_access_lock(iter->cpu_file);
4413
4414         /* Fill as many pages as possible. */
4415         for (i = 0, rem = len; i < pipe->buffers && rem; i++) {
4416                 spd.pages[i] = alloc_page(GFP_KERNEL);
4417                 if (!spd.pages[i])
4418                         break;
4419
4420                 rem = tracing_fill_pipe_page(rem, iter);
4421
4422                 /* Copy the data into the page, so we can start over. */
4423                 ret = trace_seq_to_buffer(&iter->seq,
4424                                           page_address(spd.pages[i]),
4425                                           iter->seq.len);
4426                 if (ret < 0) {
4427                         __free_page(spd.pages[i]);
4428                         break;
4429                 }
4430                 spd.partial[i].offset = 0;
4431                 spd.partial[i].len = iter->seq.len;
4432
4433                 trace_seq_init(&iter->seq);
4434         }
4435
4436         trace_access_unlock(iter->cpu_file);
4437         trace_event_read_unlock();
4438         mutex_unlock(&iter->mutex);
4439
4440         spd.nr_pages = i;
4441
4442         ret = splice_to_pipe(pipe, &spd);
4443 out:
4444         splice_shrink_spd(&spd);
4445         return ret;
4446
4447 out_err:
4448         mutex_unlock(&iter->mutex);
4449         goto out;
4450 }
4451
4452 static ssize_t
4453 tracing_entries_read(struct file *filp, char __user *ubuf,
4454                      size_t cnt, loff_t *ppos)
4455 {
4456         struct inode *inode = file_inode(filp);
4457         struct trace_array *tr = inode->i_private;
4458         int cpu = tracing_get_cpu(inode);
4459         char buf[64];
4460         int r = 0;
4461         ssize_t ret;
4462
4463         mutex_lock(&trace_types_lock);
4464
4465         if (cpu == RING_BUFFER_ALL_CPUS) {
4466                 int cpu, buf_size_same;
4467                 unsigned long size;
4468
4469                 size = 0;
4470                 buf_size_same = 1;
4471                 /* check if all cpu sizes are same */
4472                 for_each_tracing_cpu(cpu) {
4473                         /* fill in the size from first enabled cpu */
4474                         if (size == 0)
4475                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
4476                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
4477                                 buf_size_same = 0;
4478                                 break;
4479                         }
4480                 }
4481
4482                 if (buf_size_same) {
4483                         if (!ring_buffer_expanded)
4484                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
4485                                             size >> 10,
4486                                             trace_buf_size >> 10);
4487                         else
4488                                 r = sprintf(buf, "%lu\n", size >> 10);
4489                 } else
4490                         r = sprintf(buf, "X\n");
4491         } else
4492                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
4493
4494         mutex_unlock(&trace_types_lock);
4495
4496         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4497         return ret;
4498 }
4499
4500 static ssize_t
4501 tracing_entries_write(struct file *filp, const char __user *ubuf,
4502                       size_t cnt, loff_t *ppos)
4503 {
4504         struct inode *inode = file_inode(filp);
4505         struct trace_array *tr = inode->i_private;
4506         unsigned long val;
4507         int ret;
4508
4509         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4510         if (ret)
4511                 return ret;
4512
4513         /* must have at least 1 entry */
4514         if (!val)
4515                 return -EINVAL;
4516
4517         /* value is in KB */
4518         val <<= 10;
4519         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
4520         if (ret < 0)
4521                 return ret;
4522
4523         *ppos += cnt;
4524
4525         return cnt;
4526 }
4527
4528 static ssize_t
4529 tracing_total_entries_read(struct file *filp, char __user *ubuf,
4530                                 size_t cnt, loff_t *ppos)
4531 {
4532         struct trace_array *tr = filp->private_data;
4533         char buf[64];
4534         int r, cpu;
4535         unsigned long size = 0, expanded_size = 0;
4536
4537         mutex_lock(&trace_types_lock);
4538         for_each_tracing_cpu(cpu) {
4539                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
4540                 if (!ring_buffer_expanded)
4541                         expanded_size += trace_buf_size >> 10;
4542         }
4543         if (ring_buffer_expanded)
4544                 r = sprintf(buf, "%lu\n", size);
4545         else
4546                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
4547         mutex_unlock(&trace_types_lock);
4548
4549         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4550 }
4551
4552 static ssize_t
4553 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
4554                           size_t cnt, loff_t *ppos)
4555 {
4556         /*
4557          * There is no need to read what the user has written, this function
4558          * is just to make sure that there is no error when "echo" is used
4559          */
4560
4561         *ppos += cnt;
4562
4563         return cnt;
4564 }
4565
4566 static int
4567 tracing_free_buffer_release(struct inode *inode, struct file *filp)
4568 {
4569         struct trace_array *tr = inode->i_private;
4570
4571         /* disable tracing ? */
4572         if (trace_flags & TRACE_ITER_STOP_ON_FREE)
4573                 tracer_tracing_off(tr);
4574         /* resize the ring buffer to 0 */
4575         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
4576
4577         trace_array_put(tr);
4578
4579         return 0;
4580 }
4581
4582 static ssize_t
4583 tracing_mark_write(struct file *filp, const char __user *ubuf,
4584                                         size_t cnt, loff_t *fpos)
4585 {
4586         unsigned long addr = (unsigned long)ubuf;
4587         struct trace_array *tr = filp->private_data;
4588         struct ring_buffer_event *event;
4589         struct ring_buffer *buffer;
4590         struct print_entry *entry;
4591         unsigned long irq_flags;
4592         struct page *pages[2];
4593         void *map_page[2];
4594         int nr_pages = 1;
4595         ssize_t written;
4596         int offset;
4597         int size;
4598         int len;
4599         int ret;
4600         int i;
4601
4602         if (tracing_disabled)
4603                 return -EINVAL;
4604
4605         if (!(trace_flags & TRACE_ITER_MARKERS))
4606                 return -EINVAL;
4607
4608         if (cnt > TRACE_BUF_SIZE)
4609                 cnt = TRACE_BUF_SIZE;
4610
4611         /*
4612          * Userspace is injecting traces into the kernel trace buffer.
4613          * We want to be as non intrusive as possible.
4614          * To do so, we do not want to allocate any special buffers
4615          * or take any locks, but instead write the userspace data
4616          * straight into the ring buffer.
4617          *
4618          * First we need to pin the userspace buffer into memory,
4619          * which, most likely it is, because it just referenced it.
4620          * But there's no guarantee that it is. By using get_user_pages_fast()
4621          * and kmap_atomic/kunmap_atomic() we can get access to the
4622          * pages directly. We then write the data directly into the
4623          * ring buffer.
4624          */
4625         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
4626
4627         /* check if we cross pages */
4628         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
4629                 nr_pages = 2;
4630
4631         offset = addr & (PAGE_SIZE - 1);
4632         addr &= PAGE_MASK;
4633
4634         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
4635         if (ret < nr_pages) {
4636                 while (--ret >= 0)
4637                         put_page(pages[ret]);
4638                 written = -EFAULT;
4639                 goto out;
4640         }
4641
4642         for (i = 0; i < nr_pages; i++)
4643                 map_page[i] = kmap_atomic(pages[i]);
4644
4645         local_save_flags(irq_flags);
4646         size = sizeof(*entry) + cnt + 2; /* possible \n added */
4647         buffer = tr->trace_buffer.buffer;
4648         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
4649                                           irq_flags, preempt_count());
4650         if (!event) {
4651                 /* Ring buffer disabled, return as if not open for write */
4652                 written = -EBADF;
4653                 goto out_unlock;
4654         }
4655
4656         entry = ring_buffer_event_data(event);
4657         entry->ip = _THIS_IP_;
4658
4659         if (nr_pages == 2) {
4660                 len = PAGE_SIZE - offset;
4661                 memcpy(&entry->buf, map_page[0] + offset, len);
4662                 memcpy(&entry->buf[len], map_page[1], cnt - len);
4663         } else
4664                 memcpy(&entry->buf, map_page[0] + offset, cnt);
4665
4666         if (entry->buf[cnt - 1] != '\n') {
4667                 entry->buf[cnt] = '\n';
4668                 entry->buf[cnt + 1] = '\0';
4669         } else
4670                 entry->buf[cnt] = '\0';
4671
4672         __buffer_unlock_commit(buffer, event);
4673
4674         written = cnt;
4675
4676         *fpos += written;
4677
4678  out_unlock:
4679         for (i = 0; i < nr_pages; i++){
4680                 kunmap_atomic(map_page[i]);
4681                 put_page(pages[i]);
4682         }
4683  out:
4684         return written;
4685 }
4686
4687 static int tracing_clock_show(struct seq_file *m, void *v)
4688 {
4689         struct trace_array *tr = m->private;
4690         int i;
4691
4692         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
4693                 seq_printf(m,
4694                         "%s%s%s%s", i ? " " : "",
4695                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
4696                         i == tr->clock_id ? "]" : "");
4697         seq_putc(m, '\n');
4698
4699         return 0;
4700 }
4701
4702 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
4703                                    size_t cnt, loff_t *fpos)
4704 {
4705         struct seq_file *m = filp->private_data;
4706         struct trace_array *tr = m->private;
4707         char buf[64];
4708         const char *clockstr;
4709         int i;
4710
4711         if (cnt >= sizeof(buf))
4712                 return -EINVAL;
4713
4714         if (copy_from_user(&buf, ubuf, cnt))
4715                 return -EFAULT;
4716
4717         buf[cnt] = 0;
4718
4719         clockstr = strstrip(buf);
4720
4721         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
4722                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
4723                         break;
4724         }
4725         if (i == ARRAY_SIZE(trace_clocks))
4726                 return -EINVAL;
4727
4728         mutex_lock(&trace_types_lock);
4729
4730         tr->clock_id = i;
4731
4732         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
4733
4734         /*
4735          * New clock may not be consistent with the previous clock.
4736          * Reset the buffer so that it doesn't have incomparable timestamps.
4737          */
4738         tracing_reset_online_cpus(&tr->trace_buffer);
4739
4740 #ifdef CONFIG_TRACER_MAX_TRACE
4741         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
4742                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
4743         tracing_reset_online_cpus(&tr->max_buffer);
4744 #endif
4745
4746         mutex_unlock(&trace_types_lock);
4747
4748         *fpos += cnt;
4749
4750         return cnt;
4751 }
4752
4753 static int tracing_clock_open(struct inode *inode, struct file *file)
4754 {
4755         struct trace_array *tr = inode->i_private;
4756         int ret;
4757
4758         if (tracing_disabled)
4759                 return -ENODEV;
4760
4761         if (trace_array_get(tr))
4762                 return -ENODEV;
4763
4764         ret = single_open(file, tracing_clock_show, inode->i_private);
4765         if (ret < 0)
4766                 trace_array_put(tr);
4767
4768         return ret;
4769 }
4770
4771 struct ftrace_buffer_info {
4772         struct trace_iterator   iter;
4773         void                    *spare;
4774         unsigned int            read;
4775 };
4776
4777 #ifdef CONFIG_TRACER_SNAPSHOT
4778 static int tracing_snapshot_open(struct inode *inode, struct file *file)
4779 {
4780         struct trace_array *tr = inode->i_private;
4781         struct trace_iterator *iter;
4782         struct seq_file *m;
4783         int ret = 0;
4784
4785         if (trace_array_get(tr) < 0)
4786                 return -ENODEV;
4787
4788         if (file->f_mode & FMODE_READ) {
4789                 iter = __tracing_open(inode, file, true);
4790                 if (IS_ERR(iter))
4791                         ret = PTR_ERR(iter);
4792         } else {
4793                 /* Writes still need the seq_file to hold the private data */
4794                 ret = -ENOMEM;
4795                 m = kzalloc(sizeof(*m), GFP_KERNEL);
4796                 if (!m)
4797                         goto out;
4798                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4799                 if (!iter) {
4800                         kfree(m);
4801                         goto out;
4802                 }
4803                 ret = 0;
4804
4805                 iter->tr = tr;
4806                 iter->trace_buffer = &tr->max_buffer;
4807                 iter->cpu_file = tracing_get_cpu(inode);
4808                 m->private = iter;
4809                 file->private_data = m;
4810         }
4811 out:
4812         if (ret < 0)
4813                 trace_array_put(tr);
4814
4815         return ret;
4816 }
4817
4818 static ssize_t
4819 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
4820                        loff_t *ppos)
4821 {
4822         struct seq_file *m = filp->private_data;
4823         struct trace_iterator *iter = m->private;
4824         struct trace_array *tr = iter->tr;
4825         unsigned long val;
4826         int ret;
4827
4828         ret = tracing_update_buffers();
4829         if (ret < 0)
4830                 return ret;
4831
4832         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4833         if (ret)
4834                 return ret;
4835
4836         mutex_lock(&trace_types_lock);
4837
4838         if (tr->current_trace->use_max_tr) {
4839                 ret = -EBUSY;
4840                 goto out;
4841         }
4842
4843         switch (val) {
4844         case 0:
4845                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4846                         ret = -EINVAL;
4847                         break;
4848                 }
4849                 if (tr->allocated_snapshot)
4850                         free_snapshot(tr);
4851                 break;
4852         case 1:
4853 /* Only allow per-cpu swap if the ring buffer supports it */
4854 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
4855                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4856                         ret = -EINVAL;
4857                         break;
4858                 }
4859 #endif
4860                 if (!tr->allocated_snapshot) {
4861                         ret = alloc_snapshot(tr);
4862                         if (ret < 0)
4863                                 break;
4864                 }
4865                 local_irq_disable();
4866                 /* Now, we're going to swap */
4867                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4868                         update_max_tr(tr, current, smp_processor_id());
4869                 else
4870                         update_max_tr_single(tr, current, iter->cpu_file);
4871                 local_irq_enable();
4872                 break;
4873         default:
4874                 if (tr->allocated_snapshot) {
4875                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4876                                 tracing_reset_online_cpus(&tr->max_buffer);
4877                         else
4878                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
4879                 }
4880                 break;
4881         }
4882
4883         if (ret >= 0) {
4884                 *ppos += cnt;
4885                 ret = cnt;
4886         }
4887 out:
4888         mutex_unlock(&trace_types_lock);
4889         return ret;
4890 }
4891
4892 static int tracing_snapshot_release(struct inode *inode, struct file *file)
4893 {
4894         struct seq_file *m = file->private_data;
4895         int ret;
4896
4897         ret = tracing_release(inode, file);
4898
4899         if (file->f_mode & FMODE_READ)
4900                 return ret;
4901
4902         /* If write only, the seq_file is just a stub */
4903         if (m)
4904                 kfree(m->private);
4905         kfree(m);
4906
4907         return 0;
4908 }
4909
4910 static int tracing_buffers_open(struct inode *inode, struct file *filp);
4911 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
4912                                     size_t count, loff_t *ppos);
4913 static int tracing_buffers_release(struct inode *inode, struct file *file);
4914 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
4915                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
4916
4917 static int snapshot_raw_open(struct inode *inode, struct file *filp)
4918 {
4919         struct ftrace_buffer_info *info;
4920         int ret;
4921
4922         ret = tracing_buffers_open(inode, filp);
4923         if (ret < 0)
4924                 return ret;
4925
4926         info = filp->private_data;
4927
4928         if (info->iter.trace->use_max_tr) {
4929                 tracing_buffers_release(inode, filp);
4930                 return -EBUSY;
4931         }
4932
4933         info->iter.snapshot = true;
4934         info->iter.trace_buffer = &info->iter.tr->max_buffer;
4935
4936         return ret;
4937 }
4938
4939 #endif /* CONFIG_TRACER_SNAPSHOT */
4940
4941
4942 static const struct file_operations tracing_max_lat_fops = {
4943         .open           = tracing_open_generic,
4944         .read           = tracing_max_lat_read,
4945         .write          = tracing_max_lat_write,
4946         .llseek         = generic_file_llseek,
4947 };
4948
4949 static const struct file_operations set_tracer_fops = {
4950         .open           = tracing_open_generic,
4951         .read           = tracing_set_trace_read,
4952         .write          = tracing_set_trace_write,
4953         .llseek         = generic_file_llseek,
4954 };
4955
4956 static const struct file_operations tracing_pipe_fops = {
4957         .open           = tracing_open_pipe,
4958         .poll           = tracing_poll_pipe,
4959         .read           = tracing_read_pipe,
4960         .splice_read    = tracing_splice_read_pipe,
4961         .release        = tracing_release_pipe,
4962         .llseek         = no_llseek,
4963 };
4964
4965 static const struct file_operations tracing_entries_fops = {
4966         .open           = tracing_open_generic_tr,
4967         .read           = tracing_entries_read,
4968         .write          = tracing_entries_write,
4969         .llseek         = generic_file_llseek,
4970         .release        = tracing_release_generic_tr,
4971 };
4972
4973 static const struct file_operations tracing_total_entries_fops = {
4974         .open           = tracing_open_generic_tr,
4975         .read           = tracing_total_entries_read,
4976         .llseek         = generic_file_llseek,
4977         .release        = tracing_release_generic_tr,
4978 };
4979
4980 static const struct file_operations tracing_free_buffer_fops = {
4981         .open           = tracing_open_generic_tr,
4982         .write          = tracing_free_buffer_write,
4983         .release        = tracing_free_buffer_release,
4984 };
4985
4986 static const struct file_operations tracing_mark_fops = {
4987         .open           = tracing_open_generic_tr,
4988         .write          = tracing_mark_write,
4989         .llseek         = generic_file_llseek,
4990         .release        = tracing_release_generic_tr,
4991 };
4992
4993 static const struct file_operations trace_clock_fops = {
4994         .open           = tracing_clock_open,
4995         .read           = seq_read,
4996         .llseek         = seq_lseek,
4997         .release        = tracing_single_release_tr,
4998         .write          = tracing_clock_write,
4999 };
5000
5001 #ifdef CONFIG_TRACER_SNAPSHOT
5002 static const struct file_operations snapshot_fops = {
5003         .open           = tracing_snapshot_open,
5004         .read           = seq_read,
5005         .write          = tracing_snapshot_write,
5006         .llseek         = tracing_lseek,
5007         .release        = tracing_snapshot_release,
5008 };
5009
5010 static const struct file_operations snapshot_raw_fops = {
5011         .open           = snapshot_raw_open,
5012         .read           = tracing_buffers_read,
5013         .release        = tracing_buffers_release,
5014         .splice_read    = tracing_buffers_splice_read,
5015         .llseek         = no_llseek,
5016 };
5017
5018 #endif /* CONFIG_TRACER_SNAPSHOT */
5019
5020 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5021 {
5022         struct trace_array *tr = inode->i_private;
5023         struct ftrace_buffer_info *info;
5024         int ret;
5025
5026         if (tracing_disabled)
5027                 return -ENODEV;
5028
5029         if (trace_array_get(tr) < 0)
5030                 return -ENODEV;
5031
5032         info = kzalloc(sizeof(*info), GFP_KERNEL);
5033         if (!info) {
5034                 trace_array_put(tr);
5035                 return -ENOMEM;
5036         }
5037
5038         mutex_lock(&trace_types_lock);
5039
5040         info->iter.tr           = tr;
5041         info->iter.cpu_file     = tracing_get_cpu(inode);
5042         info->iter.trace        = tr->current_trace;
5043         info->iter.trace_buffer = &tr->trace_buffer;
5044         info->spare             = NULL;
5045         /* Force reading ring buffer for first read */
5046         info->read              = (unsigned int)-1;
5047
5048         filp->private_data = info;
5049
5050         mutex_unlock(&trace_types_lock);
5051
5052         ret = nonseekable_open(inode, filp);
5053         if (ret < 0)
5054                 trace_array_put(tr);
5055
5056         return ret;
5057 }
5058
5059 static unsigned int
5060 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5061 {
5062         struct ftrace_buffer_info *info = filp->private_data;
5063         struct trace_iterator *iter = &info->iter;
5064
5065         return trace_poll(iter, filp, poll_table);
5066 }
5067
5068 static ssize_t
5069 tracing_buffers_read(struct file *filp, char __user *ubuf,
5070                      size_t count, loff_t *ppos)
5071 {
5072         struct ftrace_buffer_info *info = filp->private_data;
5073         struct trace_iterator *iter = &info->iter;
5074         ssize_t ret;
5075         ssize_t size;
5076
5077         if (!count)
5078                 return 0;
5079
5080         mutex_lock(&trace_types_lock);
5081
5082 #ifdef CONFIG_TRACER_MAX_TRACE
5083         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5084                 size = -EBUSY;
5085                 goto out_unlock;
5086         }
5087 #endif
5088
5089         if (!info->spare)
5090                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5091                                                           iter->cpu_file);
5092         size = -ENOMEM;
5093         if (!info->spare)
5094                 goto out_unlock;
5095
5096         /* Do we have previous read data to read? */
5097         if (info->read < PAGE_SIZE)
5098                 goto read;
5099
5100  again:
5101         trace_access_lock(iter->cpu_file);
5102         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5103                                     &info->spare,
5104                                     count,
5105                                     iter->cpu_file, 0);
5106         trace_access_unlock(iter->cpu_file);
5107
5108         if (ret < 0) {
5109                 if (trace_empty(iter)) {
5110                         if ((filp->f_flags & O_NONBLOCK)) {
5111                                 size = -EAGAIN;
5112                                 goto out_unlock;
5113                         }
5114                         mutex_unlock(&trace_types_lock);
5115                         iter->trace->wait_pipe(iter);
5116                         mutex_lock(&trace_types_lock);
5117                         if (signal_pending(current)) {
5118                                 size = -EINTR;
5119                                 goto out_unlock;
5120                         }
5121                         goto again;
5122                 }
5123                 size = 0;
5124                 goto out_unlock;
5125         }
5126
5127         info->read = 0;
5128  read:
5129         size = PAGE_SIZE - info->read;
5130         if (size > count)
5131                 size = count;
5132
5133         ret = copy_to_user(ubuf, info->spare + info->read, size);
5134         if (ret == size) {
5135                 size = -EFAULT;
5136                 goto out_unlock;
5137         }
5138         size -= ret;
5139
5140         *ppos += size;
5141         info->read += size;
5142
5143  out_unlock:
5144         mutex_unlock(&trace_types_lock);
5145
5146         return size;
5147 }
5148
5149 static int tracing_buffers_release(struct inode *inode, struct file *file)
5150 {
5151         struct ftrace_buffer_info *info = file->private_data;
5152         struct trace_iterator *iter = &info->iter;
5153
5154         mutex_lock(&trace_types_lock);
5155
5156         __trace_array_put(iter->tr);
5157
5158         if (info->spare)
5159                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5160         kfree(info);
5161
5162         mutex_unlock(&trace_types_lock);
5163
5164         return 0;
5165 }
5166
5167 struct buffer_ref {
5168         struct ring_buffer      *buffer;
5169         void                    *page;
5170         int                     ref;
5171 };
5172
5173 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5174                                     struct pipe_buffer *buf)
5175 {
5176         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5177
5178         if (--ref->ref)
5179                 return;
5180
5181         ring_buffer_free_read_page(ref->buffer, ref->page);
5182         kfree(ref);
5183         buf->private = 0;
5184 }
5185
5186 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5187                                 struct pipe_buffer *buf)
5188 {
5189         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5190
5191         ref->ref++;
5192 }
5193
5194 /* Pipe buffer operations for a buffer. */
5195 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5196         .can_merge              = 0,
5197         .map                    = generic_pipe_buf_map,
5198         .unmap                  = generic_pipe_buf_unmap,
5199         .confirm                = generic_pipe_buf_confirm,
5200         .release                = buffer_pipe_buf_release,
5201         .steal                  = generic_pipe_buf_steal,
5202         .get                    = buffer_pipe_buf_get,
5203 };
5204
5205 /*
5206  * Callback from splice_to_pipe(), if we need to release some pages
5207  * at the end of the spd in case we error'ed out in filling the pipe.
5208  */
5209 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5210 {
5211         struct buffer_ref *ref =
5212                 (struct buffer_ref *)spd->partial[i].private;
5213
5214         if (--ref->ref)
5215                 return;
5216
5217         ring_buffer_free_read_page(ref->buffer, ref->page);
5218         kfree(ref);
5219         spd->partial[i].private = 0;
5220 }
5221
5222 static ssize_t
5223 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5224                             struct pipe_inode_info *pipe, size_t len,
5225                             unsigned int flags)
5226 {
5227         struct ftrace_buffer_info *info = file->private_data;
5228         struct trace_iterator *iter = &info->iter;
5229         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5230         struct page *pages_def[PIPE_DEF_BUFFERS];
5231         struct splice_pipe_desc spd = {
5232                 .pages          = pages_def,
5233                 .partial        = partial_def,
5234                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5235                 .flags          = flags,
5236                 .ops            = &buffer_pipe_buf_ops,
5237                 .spd_release    = buffer_spd_release,
5238         };
5239         struct buffer_ref *ref;
5240         int entries, size, i;
5241         ssize_t ret;
5242
5243         mutex_lock(&trace_types_lock);
5244
5245 #ifdef CONFIG_TRACER_MAX_TRACE
5246         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5247                 ret = -EBUSY;
5248                 goto out;
5249         }
5250 #endif
5251
5252         if (splice_grow_spd(pipe, &spd)) {
5253                 ret = -ENOMEM;
5254                 goto out;
5255         }
5256
5257         if (*ppos & (PAGE_SIZE - 1)) {
5258                 ret = -EINVAL;
5259                 goto out;
5260         }
5261
5262         if (len & (PAGE_SIZE - 1)) {
5263                 if (len < PAGE_SIZE) {
5264                         ret = -EINVAL;
5265                         goto out;
5266                 }
5267                 len &= PAGE_MASK;
5268         }
5269
5270  again:
5271         trace_access_lock(iter->cpu_file);
5272         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5273
5274         for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) {
5275                 struct page *page;
5276                 int r;
5277
5278                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5279                 if (!ref)
5280                         break;
5281
5282                 ref->ref = 1;
5283                 ref->buffer = iter->trace_buffer->buffer;
5284                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5285                 if (!ref->page) {
5286                         kfree(ref);
5287                         break;
5288                 }
5289
5290                 r = ring_buffer_read_page(ref->buffer, &ref->page,
5291                                           len, iter->cpu_file, 1);
5292                 if (r < 0) {
5293                         ring_buffer_free_read_page(ref->buffer, ref->page);
5294                         kfree(ref);
5295                         break;
5296                 }
5297
5298                 /*
5299                  * zero out any left over data, this is going to
5300                  * user land.
5301                  */
5302                 size = ring_buffer_page_len(ref->page);
5303                 if (size < PAGE_SIZE)
5304                         memset(ref->page + size, 0, PAGE_SIZE - size);
5305
5306                 page = virt_to_page(ref->page);
5307
5308                 spd.pages[i] = page;
5309                 spd.partial[i].len = PAGE_SIZE;
5310                 spd.partial[i].offset = 0;
5311                 spd.partial[i].private = (unsigned long)ref;
5312                 spd.nr_pages++;
5313                 *ppos += PAGE_SIZE;
5314
5315                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5316         }
5317
5318         trace_access_unlock(iter->cpu_file);
5319         spd.nr_pages = i;
5320
5321         /* did we read anything? */
5322         if (!spd.nr_pages) {
5323                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) {
5324                         ret = -EAGAIN;
5325                         goto out;
5326                 }
5327                 mutex_unlock(&trace_types_lock);
5328                 iter->trace->wait_pipe(iter);
5329                 mutex_lock(&trace_types_lock);
5330                 if (signal_pending(current)) {
5331                         ret = -EINTR;
5332                         goto out;
5333                 }
5334                 goto again;
5335         }
5336
5337         ret = splice_to_pipe(pipe, &spd);
5338         splice_shrink_spd(&spd);
5339 out:
5340         mutex_unlock(&trace_types_lock);
5341
5342         return ret;
5343 }
5344
5345 static const struct file_operations tracing_buffers_fops = {
5346         .open           = tracing_buffers_open,
5347         .read           = tracing_buffers_read,
5348         .poll           = tracing_buffers_poll,
5349         .release        = tracing_buffers_release,
5350         .splice_read    = tracing_buffers_splice_read,
5351         .llseek         = no_llseek,
5352 };
5353
5354 static ssize_t
5355 tracing_stats_read(struct file *filp, char __user *ubuf,
5356                    size_t count, loff_t *ppos)
5357 {
5358         struct inode *inode = file_inode(filp);
5359         struct trace_array *tr = inode->i_private;
5360         struct trace_buffer *trace_buf = &tr->trace_buffer;
5361         int cpu = tracing_get_cpu(inode);
5362         struct trace_seq *s;
5363         unsigned long cnt;
5364         unsigned long long t;
5365         unsigned long usec_rem;
5366
5367         s = kmalloc(sizeof(*s), GFP_KERNEL);
5368         if (!s)
5369                 return -ENOMEM;
5370
5371         trace_seq_init(s);
5372
5373         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5374         trace_seq_printf(s, "entries: %ld\n", cnt);
5375
5376         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5377         trace_seq_printf(s, "overrun: %ld\n", cnt);
5378
5379         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5380         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5381
5382         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5383         trace_seq_printf(s, "bytes: %ld\n", cnt);
5384
5385         if (trace_clocks[tr->clock_id].in_ns) {
5386                 /* local or global for trace_clock */
5387                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5388                 usec_rem = do_div(t, USEC_PER_SEC);
5389                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5390                                                                 t, usec_rem);
5391
5392                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5393                 usec_rem = do_div(t, USEC_PER_SEC);
5394                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5395         } else {
5396                 /* counter or tsc mode for trace_clock */
5397                 trace_seq_printf(s, "oldest event ts: %llu\n",
5398                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5399
5400                 trace_seq_printf(s, "now ts: %llu\n",
5401                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
5402         }
5403
5404         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5405         trace_seq_printf(s, "dropped events: %ld\n", cnt);
5406
5407         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5408         trace_seq_printf(s, "read events: %ld\n", cnt);
5409
5410         count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
5411
5412         kfree(s);
5413
5414         return count;
5415 }
5416
5417 static const struct file_operations tracing_stats_fops = {
5418         .open           = tracing_open_generic_tr,
5419         .read           = tracing_stats_read,
5420         .llseek         = generic_file_llseek,
5421         .release        = tracing_release_generic_tr,
5422 };
5423
5424 #ifdef CONFIG_DYNAMIC_FTRACE
5425
5426 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
5427 {
5428         return 0;
5429 }
5430
5431 static ssize_t
5432 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5433                   size_t cnt, loff_t *ppos)
5434 {
5435         static char ftrace_dyn_info_buffer[1024];
5436         static DEFINE_MUTEX(dyn_info_mutex);
5437         unsigned long *p = filp->private_data;
5438         char *buf = ftrace_dyn_info_buffer;
5439         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
5440         int r;
5441
5442         mutex_lock(&dyn_info_mutex);
5443         r = sprintf(buf, "%ld ", *p);
5444
5445         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
5446         buf[r++] = '\n';
5447
5448         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5449
5450         mutex_unlock(&dyn_info_mutex);
5451
5452         return r;
5453 }
5454
5455 static const struct file_operations tracing_dyn_info_fops = {
5456         .open           = tracing_open_generic,
5457         .read           = tracing_read_dyn_info,
5458         .llseek         = generic_file_llseek,
5459 };
5460 #endif /* CONFIG_DYNAMIC_FTRACE */
5461
5462 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
5463 static void
5464 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5465 {
5466         tracing_snapshot();
5467 }
5468
5469 static void
5470 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5471 {
5472         unsigned long *count = (long *)data;
5473
5474         if (!*count)
5475                 return;
5476
5477         if (*count != -1)
5478                 (*count)--;
5479
5480         tracing_snapshot();
5481 }
5482
5483 static int
5484 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
5485                       struct ftrace_probe_ops *ops, void *data)
5486 {
5487         long count = (long)data;
5488
5489         seq_printf(m, "%ps:", (void *)ip);
5490
5491         seq_printf(m, "snapshot");
5492
5493         if (count == -1)
5494                 seq_printf(m, ":unlimited\n");
5495         else
5496                 seq_printf(m, ":count=%ld\n", count);
5497
5498         return 0;
5499 }
5500
5501 static struct ftrace_probe_ops snapshot_probe_ops = {
5502         .func                   = ftrace_snapshot,
5503         .print                  = ftrace_snapshot_print,
5504 };
5505
5506 static struct ftrace_probe_ops snapshot_count_probe_ops = {
5507         .func                   = ftrace_count_snapshot,
5508         .print                  = ftrace_snapshot_print,
5509 };
5510
5511 static int
5512 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
5513                                char *glob, char *cmd, char *param, int enable)
5514 {
5515         struct ftrace_probe_ops *ops;
5516         void *count = (void *)-1;
5517         char *number;
5518         int ret;
5519
5520         /* hash funcs only work with set_ftrace_filter */
5521         if (!enable)
5522                 return -EINVAL;
5523
5524         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
5525
5526         if (glob[0] == '!') {
5527                 unregister_ftrace_function_probe_func(glob+1, ops);
5528                 return 0;
5529         }
5530
5531         if (!param)
5532                 goto out_reg;
5533
5534         number = strsep(&param, ":");
5535
5536         if (!strlen(number))
5537                 goto out_reg;
5538
5539         /*
5540          * We use the callback data field (which is a pointer)
5541          * as our counter.
5542          */
5543         ret = kstrtoul(number, 0, (unsigned long *)&count);
5544         if (ret)
5545                 return ret;
5546
5547  out_reg:
5548         ret = register_ftrace_function_probe(glob, ops, count);
5549
5550         if (ret >= 0)
5551                 alloc_snapshot(&global_trace);
5552
5553         return ret < 0 ? ret : 0;
5554 }
5555
5556 static struct ftrace_func_command ftrace_snapshot_cmd = {
5557         .name                   = "snapshot",
5558         .func                   = ftrace_trace_snapshot_callback,
5559 };
5560
5561 static __init int register_snapshot_cmd(void)
5562 {
5563         return register_ftrace_command(&ftrace_snapshot_cmd);
5564 }
5565 #else
5566 static inline __init int register_snapshot_cmd(void) { return 0; }
5567 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
5568
5569 struct dentry *tracing_init_dentry_tr(struct trace_array *tr)
5570 {
5571         if (tr->dir)
5572                 return tr->dir;
5573
5574         if (!debugfs_initialized())
5575                 return NULL;
5576
5577         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
5578                 tr->dir = debugfs_create_dir("tracing", NULL);
5579
5580         if (!tr->dir)
5581                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
5582
5583         return tr->dir;
5584 }
5585
5586 struct dentry *tracing_init_dentry(void)
5587 {
5588         return tracing_init_dentry_tr(&global_trace);
5589 }
5590
5591 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
5592 {
5593         struct dentry *d_tracer;
5594
5595         if (tr->percpu_dir)
5596                 return tr->percpu_dir;
5597
5598         d_tracer = tracing_init_dentry_tr(tr);
5599         if (!d_tracer)
5600                 return NULL;
5601
5602         tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer);
5603
5604         WARN_ONCE(!tr->percpu_dir,
5605                   "Could not create debugfs directory 'per_cpu/%d'\n", cpu);
5606
5607         return tr->percpu_dir;
5608 }
5609
5610 static struct dentry *
5611 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
5612                       void *data, long cpu, const struct file_operations *fops)
5613 {
5614         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
5615
5616         if (ret) /* See tracing_get_cpu() */
5617                 ret->d_inode->i_cdev = (void *)(cpu + 1);
5618         return ret;
5619 }
5620
5621 static void
5622 tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
5623 {
5624         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
5625         struct dentry *d_cpu;
5626         char cpu_dir[30]; /* 30 characters should be more than enough */
5627
5628         if (!d_percpu)
5629                 return;
5630
5631         snprintf(cpu_dir, 30, "cpu%ld", cpu);
5632         d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
5633         if (!d_cpu) {
5634                 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
5635                 return;
5636         }
5637
5638         /* per cpu trace_pipe */
5639         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
5640                                 tr, cpu, &tracing_pipe_fops);
5641
5642         /* per cpu trace */
5643         trace_create_cpu_file("trace", 0644, d_cpu,
5644                                 tr, cpu, &tracing_fops);
5645
5646         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
5647                                 tr, cpu, &tracing_buffers_fops);
5648
5649         trace_create_cpu_file("stats", 0444, d_cpu,
5650                                 tr, cpu, &tracing_stats_fops);
5651
5652         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
5653                                 tr, cpu, &tracing_entries_fops);
5654
5655 #ifdef CONFIG_TRACER_SNAPSHOT
5656         trace_create_cpu_file("snapshot", 0644, d_cpu,
5657                                 tr, cpu, &snapshot_fops);
5658
5659         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
5660                                 tr, cpu, &snapshot_raw_fops);
5661 #endif
5662 }
5663
5664 #ifdef CONFIG_FTRACE_SELFTEST
5665 /* Let selftest have access to static functions in this file */
5666 #include "trace_selftest.c"
5667 #endif
5668
5669 struct trace_option_dentry {
5670         struct tracer_opt               *opt;
5671         struct tracer_flags             *flags;
5672         struct trace_array              *tr;
5673         struct dentry                   *entry;
5674 };
5675
5676 static ssize_t
5677 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
5678                         loff_t *ppos)
5679 {
5680         struct trace_option_dentry *topt = filp->private_data;
5681         char *buf;
5682
5683         if (topt->flags->val & topt->opt->bit)
5684                 buf = "1\n";
5685         else
5686                 buf = "0\n";
5687
5688         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5689 }
5690
5691 static ssize_t
5692 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
5693                          loff_t *ppos)
5694 {
5695         struct trace_option_dentry *topt = filp->private_data;
5696         unsigned long val;
5697         int ret;
5698
5699         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5700         if (ret)
5701                 return ret;
5702
5703         if (val != 0 && val != 1)
5704                 return -EINVAL;
5705
5706         if (!!(topt->flags->val & topt->opt->bit) != val) {
5707                 mutex_lock(&trace_types_lock);
5708                 ret = __set_tracer_option(topt->tr->current_trace, topt->flags,
5709                                           topt->opt, !val);
5710                 mutex_unlock(&trace_types_lock);
5711                 if (ret)
5712                         return ret;
5713         }
5714
5715         *ppos += cnt;
5716
5717         return cnt;
5718 }
5719
5720
5721 static const struct file_operations trace_options_fops = {
5722         .open = tracing_open_generic,
5723         .read = trace_options_read,
5724         .write = trace_options_write,
5725         .llseek = generic_file_llseek,
5726 };
5727
5728 static ssize_t
5729 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
5730                         loff_t *ppos)
5731 {
5732         long index = (long)filp->private_data;
5733         char *buf;
5734
5735         if (trace_flags & (1 << index))
5736                 buf = "1\n";
5737         else
5738                 buf = "0\n";
5739
5740         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5741 }
5742
5743 static ssize_t
5744 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
5745                          loff_t *ppos)
5746 {
5747         struct trace_array *tr = &global_trace;
5748         long index = (long)filp->private_data;
5749         unsigned long val;
5750         int ret;
5751
5752         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5753         if (ret)
5754                 return ret;
5755
5756         if (val != 0 && val != 1)
5757                 return -EINVAL;
5758
5759         mutex_lock(&trace_types_lock);
5760         ret = set_tracer_flag(tr, 1 << index, val);
5761         mutex_unlock(&trace_types_lock);
5762
5763         if (ret < 0)
5764                 return ret;
5765
5766         *ppos += cnt;
5767
5768         return cnt;
5769 }
5770
5771 static const struct file_operations trace_options_core_fops = {
5772         .open = tracing_open_generic,
5773         .read = trace_options_core_read,
5774         .write = trace_options_core_write,
5775         .llseek = generic_file_llseek,
5776 };
5777
5778 struct dentry *trace_create_file(const char *name,
5779                                  umode_t mode,
5780                                  struct dentry *parent,
5781                                  void *data,
5782                                  const struct file_operations *fops)
5783 {
5784         struct dentry *ret;
5785
5786         ret = debugfs_create_file(name, mode, parent, data, fops);
5787         if (!ret)
5788                 pr_warning("Could not create debugfs '%s' entry\n", name);
5789
5790         return ret;
5791 }
5792
5793
5794 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
5795 {
5796         struct dentry *d_tracer;
5797
5798         if (tr->options)
5799                 return tr->options;
5800
5801         d_tracer = tracing_init_dentry_tr(tr);
5802         if (!d_tracer)
5803                 return NULL;
5804
5805         tr->options = debugfs_create_dir("options", d_tracer);
5806         if (!tr->options) {
5807                 pr_warning("Could not create debugfs directory 'options'\n");
5808                 return NULL;
5809         }
5810
5811         return tr->options;
5812 }
5813
5814 static void
5815 create_trace_option_file(struct trace_array *tr,
5816                          struct trace_option_dentry *topt,
5817                          struct tracer_flags *flags,
5818                          struct tracer_opt *opt)
5819 {
5820         struct dentry *t_options;
5821
5822         t_options = trace_options_init_dentry(tr);
5823         if (!t_options)
5824                 return;
5825
5826         topt->flags = flags;
5827         topt->opt = opt;
5828         topt->tr = tr;
5829
5830         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
5831                                     &trace_options_fops);
5832
5833 }
5834
5835 static struct trace_option_dentry *
5836 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
5837 {
5838         struct trace_option_dentry *topts;
5839         struct tracer_flags *flags;
5840         struct tracer_opt *opts;
5841         int cnt;
5842
5843         if (!tracer)
5844                 return NULL;
5845
5846         flags = tracer->flags;
5847
5848         if (!flags || !flags->opts)
5849                 return NULL;
5850
5851         opts = flags->opts;
5852
5853         for (cnt = 0; opts[cnt].name; cnt++)
5854                 ;
5855
5856         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
5857         if (!topts)
5858                 return NULL;
5859
5860         for (cnt = 0; opts[cnt].name; cnt++)
5861                 create_trace_option_file(tr, &topts[cnt], flags,
5862                                          &opts[cnt]);
5863
5864         return topts;
5865 }
5866
5867 static void
5868 destroy_trace_option_files(struct trace_option_dentry *topts)
5869 {
5870         int cnt;
5871
5872         if (!topts)
5873                 return;
5874
5875         for (cnt = 0; topts[cnt].opt; cnt++) {
5876                 if (topts[cnt].entry)
5877                         debugfs_remove(topts[cnt].entry);
5878         }
5879
5880         kfree(topts);
5881 }
5882
5883 static struct dentry *
5884 create_trace_option_core_file(struct trace_array *tr,
5885                               const char *option, long index)
5886 {
5887         struct dentry *t_options;
5888
5889         t_options = trace_options_init_dentry(tr);
5890         if (!t_options)
5891                 return NULL;
5892
5893         return trace_create_file(option, 0644, t_options, (void *)index,
5894                                     &trace_options_core_fops);
5895 }
5896
5897 static __init void create_trace_options_dir(struct trace_array *tr)
5898 {
5899         struct dentry *t_options;
5900         int i;
5901
5902         t_options = trace_options_init_dentry(tr);
5903         if (!t_options)
5904                 return;
5905
5906         for (i = 0; trace_options[i]; i++)
5907                 create_trace_option_core_file(tr, trace_options[i], i);
5908 }
5909
5910 static ssize_t
5911 rb_simple_read(struct file *filp, char __user *ubuf,
5912                size_t cnt, loff_t *ppos)
5913 {
5914         struct trace_array *tr = filp->private_data;
5915         char buf[64];
5916         int r;
5917
5918         r = tracer_tracing_is_on(tr);
5919         r = sprintf(buf, "%d\n", r);
5920
5921         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5922 }
5923
5924 static ssize_t
5925 rb_simple_write(struct file *filp, const char __user *ubuf,
5926                 size_t cnt, loff_t *ppos)
5927 {
5928         struct trace_array *tr = filp->private_data;
5929         struct ring_buffer *buffer = tr->trace_buffer.buffer;
5930         unsigned long val;
5931         int ret;
5932
5933         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5934         if (ret)
5935                 return ret;
5936
5937         if (buffer) {
5938                 mutex_lock(&trace_types_lock);
5939                 if (val) {
5940                         tracer_tracing_on(tr);
5941                         if (tr->current_trace->start)
5942                                 tr->current_trace->start(tr);
5943                 } else {
5944                         tracer_tracing_off(tr);
5945                         if (tr->current_trace->stop)
5946                                 tr->current_trace->stop(tr);
5947                 }
5948                 mutex_unlock(&trace_types_lock);
5949         }
5950
5951         (*ppos)++;
5952
5953         return cnt;
5954 }
5955
5956 static const struct file_operations rb_simple_fops = {
5957         .open           = tracing_open_generic_tr,
5958         .read           = rb_simple_read,
5959         .write          = rb_simple_write,
5960         .release        = tracing_release_generic_tr,
5961         .llseek         = default_llseek,
5962 };
5963
5964 struct dentry *trace_instance_dir;
5965
5966 static void
5967 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer);
5968
5969 static int
5970 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
5971 {
5972         enum ring_buffer_flags rb_flags;
5973
5974         rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
5975
5976         buf->tr = tr;
5977
5978         buf->buffer = ring_buffer_alloc(size, rb_flags);
5979         if (!buf->buffer)
5980                 return -ENOMEM;
5981
5982         buf->data = alloc_percpu(struct trace_array_cpu);
5983         if (!buf->data) {
5984                 ring_buffer_free(buf->buffer);
5985                 return -ENOMEM;
5986         }
5987
5988         /* Allocate the first page for all buffers */
5989         set_buffer_entries(&tr->trace_buffer,
5990                            ring_buffer_size(tr->trace_buffer.buffer, 0));
5991
5992         return 0;
5993 }
5994
5995 static int allocate_trace_buffers(struct trace_array *tr, int size)
5996 {
5997         int ret;
5998
5999         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6000         if (ret)
6001                 return ret;
6002
6003 #ifdef CONFIG_TRACER_MAX_TRACE
6004         ret = allocate_trace_buffer(tr, &tr->max_buffer,
6005                                     allocate_snapshot ? size : 1);
6006         if (WARN_ON(ret)) {
6007                 ring_buffer_free(tr->trace_buffer.buffer);
6008                 free_percpu(tr->trace_buffer.data);
6009                 return -ENOMEM;
6010         }
6011         tr->allocated_snapshot = allocate_snapshot;
6012
6013         /*
6014          * Only the top level trace array gets its snapshot allocated
6015          * from the kernel command line.
6016          */
6017         allocate_snapshot = false;
6018 #endif
6019         return 0;
6020 }
6021
6022 static int new_instance_create(const char *name)
6023 {
6024         struct trace_array *tr;
6025         int ret;
6026
6027         mutex_lock(&trace_types_lock);
6028
6029         ret = -EEXIST;
6030         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6031                 if (tr->name && strcmp(tr->name, name) == 0)
6032                         goto out_unlock;
6033         }
6034
6035         ret = -ENOMEM;
6036         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6037         if (!tr)
6038                 goto out_unlock;
6039
6040         tr->name = kstrdup(name, GFP_KERNEL);
6041         if (!tr->name)
6042                 goto out_free_tr;
6043
6044         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6045                 goto out_free_tr;
6046
6047         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6048
6049         raw_spin_lock_init(&tr->start_lock);
6050
6051         tr->current_trace = &nop_trace;
6052
6053         INIT_LIST_HEAD(&tr->systems);
6054         INIT_LIST_HEAD(&tr->events);
6055
6056         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6057                 goto out_free_tr;
6058
6059         tr->dir = debugfs_create_dir(name, trace_instance_dir);
6060         if (!tr->dir)
6061                 goto out_free_tr;
6062
6063         ret = event_trace_add_tracer(tr->dir, tr);
6064         if (ret) {
6065                 debugfs_remove_recursive(tr->dir);
6066                 goto out_free_tr;
6067         }
6068
6069         init_tracer_debugfs(tr, tr->dir);
6070
6071         list_add(&tr->list, &ftrace_trace_arrays);
6072
6073         mutex_unlock(&trace_types_lock);
6074
6075         return 0;
6076
6077  out_free_tr:
6078         if (tr->trace_buffer.buffer)
6079                 ring_buffer_free(tr->trace_buffer.buffer);
6080         free_cpumask_var(tr->tracing_cpumask);
6081         kfree(tr->name);
6082         kfree(tr);
6083
6084  out_unlock:
6085         mutex_unlock(&trace_types_lock);
6086
6087         return ret;
6088
6089 }
6090
6091 static int instance_delete(const char *name)
6092 {
6093         struct trace_array *tr;
6094         int found = 0;
6095         int ret;
6096
6097         mutex_lock(&trace_types_lock);
6098
6099         ret = -ENODEV;
6100         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6101                 if (tr->name && strcmp(tr->name, name) == 0) {
6102                         found = 1;
6103                         break;
6104                 }
6105         }
6106         if (!found)
6107                 goto out_unlock;
6108
6109         ret = -EBUSY;
6110         if (tr->ref)
6111                 goto out_unlock;
6112
6113         list_del(&tr->list);
6114
6115         event_trace_del_tracer(tr);
6116         debugfs_remove_recursive(tr->dir);
6117         free_percpu(tr->trace_buffer.data);
6118         ring_buffer_free(tr->trace_buffer.buffer);
6119
6120         kfree(tr->name);
6121         kfree(tr);
6122
6123         ret = 0;
6124
6125  out_unlock:
6126         mutex_unlock(&trace_types_lock);
6127
6128         return ret;
6129 }
6130
6131 static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t mode)
6132 {
6133         struct dentry *parent;
6134         int ret;
6135
6136         /* Paranoid: Make sure the parent is the "instances" directory */
6137         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6138         if (WARN_ON_ONCE(parent != trace_instance_dir))
6139                 return -ENOENT;
6140
6141         /*
6142          * The inode mutex is locked, but debugfs_create_dir() will also
6143          * take the mutex. As the instances directory can not be destroyed
6144          * or changed in any other way, it is safe to unlock it, and
6145          * let the dentry try. If two users try to make the same dir at
6146          * the same time, then the new_instance_create() will determine the
6147          * winner.
6148          */
6149         mutex_unlock(&inode->i_mutex);
6150
6151         ret = new_instance_create(dentry->d_iname);
6152
6153         mutex_lock(&inode->i_mutex);
6154
6155         return ret;
6156 }
6157
6158 static int instance_rmdir(struct inode *inode, struct dentry *dentry)
6159 {
6160         struct dentry *parent;
6161         int ret;
6162
6163         /* Paranoid: Make sure the parent is the "instances" directory */
6164         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6165         if (WARN_ON_ONCE(parent != trace_instance_dir))
6166                 return -ENOENT;
6167
6168         /* The caller did a dget() on dentry */
6169         mutex_unlock(&dentry->d_inode->i_mutex);
6170
6171         /*
6172          * The inode mutex is locked, but debugfs_create_dir() will also
6173          * take the mutex. As the instances directory can not be destroyed
6174          * or changed in any other way, it is safe to unlock it, and
6175          * let the dentry try. If two users try to make the same dir at
6176          * the same time, then the instance_delete() will determine the
6177          * winner.
6178          */
6179         mutex_unlock(&inode->i_mutex);
6180
6181         ret = instance_delete(dentry->d_iname);
6182
6183         mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
6184         mutex_lock(&dentry->d_inode->i_mutex);
6185
6186         return ret;
6187 }
6188
6189 static const struct inode_operations instance_dir_inode_operations = {
6190         .lookup         = simple_lookup,
6191         .mkdir          = instance_mkdir,
6192         .rmdir          = instance_rmdir,
6193 };
6194
6195 static __init void create_trace_instances(struct dentry *d_tracer)
6196 {
6197         trace_instance_dir = debugfs_create_dir("instances", d_tracer);
6198         if (WARN_ON(!trace_instance_dir))
6199                 return;
6200
6201         /* Hijack the dir inode operations, to allow mkdir */
6202         trace_instance_dir->d_inode->i_op = &instance_dir_inode_operations;
6203 }
6204
6205 static void
6206 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
6207 {
6208         int cpu;
6209
6210         trace_create_file("tracing_cpumask", 0644, d_tracer,
6211                           tr, &tracing_cpumask_fops);
6212
6213         trace_create_file("trace_options", 0644, d_tracer,
6214                           tr, &tracing_iter_fops);
6215
6216         trace_create_file("trace", 0644, d_tracer,
6217                           tr, &tracing_fops);
6218
6219         trace_create_file("trace_pipe", 0444, d_tracer,
6220                           tr, &tracing_pipe_fops);
6221
6222         trace_create_file("buffer_size_kb", 0644, d_tracer,
6223                           tr, &tracing_entries_fops);
6224
6225         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6226                           tr, &tracing_total_entries_fops);
6227
6228         trace_create_file("free_buffer", 0200, d_tracer,
6229                           tr, &tracing_free_buffer_fops);
6230
6231         trace_create_file("trace_marker", 0220, d_tracer,
6232                           tr, &tracing_mark_fops);
6233
6234         trace_create_file("trace_clock", 0644, d_tracer, tr,
6235                           &trace_clock_fops);
6236
6237         trace_create_file("tracing_on", 0644, d_tracer,
6238                           tr, &rb_simple_fops);
6239
6240 #ifdef CONFIG_TRACER_SNAPSHOT
6241         trace_create_file("snapshot", 0644, d_tracer,
6242                           tr, &snapshot_fops);
6243 #endif
6244
6245         for_each_tracing_cpu(cpu)
6246                 tracing_init_debugfs_percpu(tr, cpu);
6247
6248 }
6249
6250 static __init int tracer_init_debugfs(void)
6251 {
6252         struct dentry *d_tracer;
6253
6254         trace_access_lock_init();
6255
6256         d_tracer = tracing_init_dentry();
6257         if (!d_tracer)
6258                 return 0;
6259
6260         init_tracer_debugfs(&global_trace, d_tracer);
6261
6262         trace_create_file("available_tracers", 0444, d_tracer,
6263                         &global_trace, &show_traces_fops);
6264
6265         trace_create_file("current_tracer", 0644, d_tracer,
6266                         &global_trace, &set_tracer_fops);
6267
6268 #ifdef CONFIG_TRACER_MAX_TRACE
6269         trace_create_file("tracing_max_latency", 0644, d_tracer,
6270                         &tracing_max_latency, &tracing_max_lat_fops);
6271 #endif
6272
6273         trace_create_file("tracing_thresh", 0644, d_tracer,
6274                         &tracing_thresh, &tracing_max_lat_fops);
6275
6276         trace_create_file("README", 0444, d_tracer,
6277                         NULL, &tracing_readme_fops);
6278
6279         trace_create_file("saved_cmdlines", 0444, d_tracer,
6280                         NULL, &tracing_saved_cmdlines_fops);
6281
6282 #ifdef CONFIG_DYNAMIC_FTRACE
6283         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
6284                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
6285 #endif
6286
6287         create_trace_instances(d_tracer);
6288
6289         create_trace_options_dir(&global_trace);
6290
6291         return 0;
6292 }
6293
6294 static int trace_panic_handler(struct notifier_block *this,
6295                                unsigned long event, void *unused)
6296 {
6297         if (ftrace_dump_on_oops)
6298                 ftrace_dump(ftrace_dump_on_oops);
6299         return NOTIFY_OK;
6300 }
6301
6302 static struct notifier_block trace_panic_notifier = {
6303         .notifier_call  = trace_panic_handler,
6304         .next           = NULL,
6305         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
6306 };
6307
6308 static int trace_die_handler(struct notifier_block *self,
6309                              unsigned long val,
6310                              void *data)
6311 {
6312         switch (val) {
6313         case DIE_OOPS:
6314                 if (ftrace_dump_on_oops)
6315                         ftrace_dump(ftrace_dump_on_oops);
6316                 break;
6317         default:
6318                 break;
6319         }
6320         return NOTIFY_OK;
6321 }
6322
6323 static struct notifier_block trace_die_notifier = {
6324         .notifier_call = trace_die_handler,
6325         .priority = 200
6326 };
6327
6328 /*
6329  * printk is set to max of 1024, we really don't need it that big.
6330  * Nothing should be printing 1000 characters anyway.
6331  */
6332 #define TRACE_MAX_PRINT         1000
6333
6334 /*
6335  * Define here KERN_TRACE so that we have one place to modify
6336  * it if we decide to change what log level the ftrace dump
6337  * should be at.
6338  */
6339 #define KERN_TRACE              KERN_EMERG
6340
6341 void
6342 trace_printk_seq(struct trace_seq *s)
6343 {
6344         /* Probably should print a warning here. */
6345         if (s->len >= TRACE_MAX_PRINT)
6346                 s->len = TRACE_MAX_PRINT;
6347
6348         /* should be zero ended, but we are paranoid. */
6349         s->buffer[s->len] = 0;
6350
6351         printk(KERN_TRACE "%s", s->buffer);
6352
6353         trace_seq_init(s);
6354 }
6355
6356 void trace_init_global_iter(struct trace_iterator *iter)
6357 {
6358         iter->tr = &global_trace;
6359         iter->trace = iter->tr->current_trace;
6360         iter->cpu_file = RING_BUFFER_ALL_CPUS;
6361         iter->trace_buffer = &global_trace.trace_buffer;
6362
6363         if (iter->trace && iter->trace->open)
6364                 iter->trace->open(iter);
6365
6366         /* Annotate start of buffers if we had overruns */
6367         if (ring_buffer_overruns(iter->trace_buffer->buffer))
6368                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
6369
6370         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6371         if (trace_clocks[iter->tr->clock_id].in_ns)
6372                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6373 }
6374
6375 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
6376 {
6377         /* use static because iter can be a bit big for the stack */
6378         static struct trace_iterator iter;
6379         static atomic_t dump_running;
6380         unsigned int old_userobj;
6381         unsigned long flags;
6382         int cnt = 0, cpu;
6383
6384         /* Only allow one dump user at a time. */
6385         if (atomic_inc_return(&dump_running) != 1) {
6386                 atomic_dec(&dump_running);
6387                 return;
6388         }
6389
6390         /*
6391          * Always turn off tracing when we dump.
6392          * We don't need to show trace output of what happens
6393          * between multiple crashes.
6394          *
6395          * If the user does a sysrq-z, then they can re-enable
6396          * tracing with echo 1 > tracing_on.
6397          */
6398         tracing_off();
6399
6400         local_irq_save(flags);
6401
6402         /* Simulate the iterator */
6403         trace_init_global_iter(&iter);
6404
6405         for_each_tracing_cpu(cpu) {
6406                 atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled);
6407         }
6408
6409         old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
6410
6411         /* don't look at user memory in panic mode */
6412         trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
6413
6414         switch (oops_dump_mode) {
6415         case DUMP_ALL:
6416                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6417                 break;
6418         case DUMP_ORIG:
6419                 iter.cpu_file = raw_smp_processor_id();
6420                 break;
6421         case DUMP_NONE:
6422                 goto out_enable;
6423         default:
6424                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
6425                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6426         }
6427
6428         printk(KERN_TRACE "Dumping ftrace buffer:\n");
6429
6430         /* Did function tracer already get disabled? */
6431         if (ftrace_is_dead()) {
6432                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
6433                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
6434         }
6435
6436         /*
6437          * We need to stop all tracing on all CPUS to read the
6438          * the next buffer. This is a bit expensive, but is
6439          * not done often. We fill all what we can read,
6440          * and then release the locks again.
6441          */
6442
6443         while (!trace_empty(&iter)) {
6444
6445                 if (!cnt)
6446                         printk(KERN_TRACE "---------------------------------\n");
6447
6448                 cnt++;
6449
6450                 /* reset all but tr, trace, and overruns */
6451                 memset(&iter.seq, 0,
6452                        sizeof(struct trace_iterator) -
6453                        offsetof(struct trace_iterator, seq));
6454                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
6455                 iter.pos = -1;
6456
6457                 if (trace_find_next_entry_inc(&iter) != NULL) {
6458                         int ret;
6459
6460                         ret = print_trace_line(&iter);
6461                         if (ret != TRACE_TYPE_NO_CONSUME)
6462                                 trace_consume(&iter);
6463                 }
6464                 touch_nmi_watchdog();
6465
6466                 trace_printk_seq(&iter.seq);
6467         }
6468
6469         if (!cnt)
6470                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
6471         else
6472                 printk(KERN_TRACE "---------------------------------\n");
6473
6474  out_enable:
6475         trace_flags |= old_userobj;
6476
6477         for_each_tracing_cpu(cpu) {
6478                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
6479         }
6480         atomic_dec(&dump_running);
6481         local_irq_restore(flags);
6482 }
6483 EXPORT_SYMBOL_GPL(ftrace_dump);
6484
6485 __init static int tracer_alloc_buffers(void)
6486 {
6487         int ring_buf_size;
6488         int ret = -ENOMEM;
6489
6490
6491         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
6492                 goto out;
6493
6494         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
6495                 goto out_free_buffer_mask;
6496
6497         /* Only allocate trace_printk buffers if a trace_printk exists */
6498         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
6499                 /* Must be called before global_trace.buffer is allocated */
6500                 trace_printk_init_buffers();
6501
6502         /* To save memory, keep the ring buffer size to its minimum */
6503         if (ring_buffer_expanded)
6504                 ring_buf_size = trace_buf_size;
6505         else
6506                 ring_buf_size = 1;
6507
6508         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
6509         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
6510
6511         raw_spin_lock_init(&global_trace.start_lock);
6512
6513         /* Used for event triggers */
6514         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
6515         if (!temp_buffer)
6516                 goto out_free_cpumask;
6517
6518         /* TODO: make the number of buffers hot pluggable with CPUS */
6519         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
6520                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
6521                 WARN_ON(1);
6522                 goto out_free_temp_buffer;
6523         }
6524
6525         if (global_trace.buffer_disabled)
6526                 tracing_off();
6527
6528         trace_init_cmdlines();
6529
6530         /*
6531          * register_tracer() might reference current_trace, so it
6532          * needs to be set before we register anything. This is
6533          * just a bootstrap of current_trace anyway.
6534          */
6535         global_trace.current_trace = &nop_trace;
6536
6537         register_tracer(&nop_trace);
6538
6539         /* All seems OK, enable tracing */
6540         tracing_disabled = 0;
6541
6542         atomic_notifier_chain_register(&panic_notifier_list,
6543                                        &trace_panic_notifier);
6544
6545         register_die_notifier(&trace_die_notifier);
6546
6547         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
6548
6549         INIT_LIST_HEAD(&global_trace.systems);
6550         INIT_LIST_HEAD(&global_trace.events);
6551         list_add(&global_trace.list, &ftrace_trace_arrays);
6552
6553         while (trace_boot_options) {
6554                 char *option;
6555
6556                 option = strsep(&trace_boot_options, ",");
6557                 trace_set_options(&global_trace, option);
6558         }
6559
6560         register_snapshot_cmd();
6561
6562         return 0;
6563
6564 out_free_temp_buffer:
6565         ring_buffer_free(temp_buffer);
6566 out_free_cpumask:
6567         free_percpu(global_trace.trace_buffer.data);
6568 #ifdef CONFIG_TRACER_MAX_TRACE
6569         free_percpu(global_trace.max_buffer.data);
6570 #endif
6571         free_cpumask_var(global_trace.tracing_cpumask);
6572 out_free_buffer_mask:
6573         free_cpumask_var(tracing_buffer_mask);
6574 out:
6575         return ret;
6576 }
6577
6578 __init static int clear_boot_tracer(void)
6579 {
6580         /*
6581          * The default tracer at boot buffer is an init section.
6582          * This function is called in lateinit. If we did not
6583          * find the boot tracer, then clear it out, to prevent
6584          * later registration from accessing the buffer that is
6585          * about to be freed.
6586          */
6587         if (!default_bootup_tracer)
6588                 return 0;
6589
6590         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
6591                default_bootup_tracer);
6592         default_bootup_tracer = NULL;
6593
6594         return 0;
6595 }
6596
6597 early_initcall(tracer_alloc_buffers);
6598 fs_initcall(tracer_init_debugfs);
6599 late_initcall(clear_boot_tracer);