]> Pileus Git - ~andy/linux/blob - kernel/trace/trace.c
tracing: Fix formatting of trace README file
[~andy/linux] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/pagemap.h>
24 #include <linux/hardirq.h>
25 #include <linux/linkage.h>
26 #include <linux/uaccess.h>
27 #include <linux/kprobes.h>
28 #include <linux/ftrace.h>
29 #include <linux/module.h>
30 #include <linux/percpu.h>
31 #include <linux/splice.h>
32 #include <linux/kdebug.h>
33 #include <linux/string.h>
34 #include <linux/rwsem.h>
35 #include <linux/slab.h>
36 #include <linux/ctype.h>
37 #include <linux/init.h>
38 #include <linux/poll.h>
39 #include <linux/nmi.h>
40 #include <linux/fs.h>
41 #include <linux/sched/rt.h>
42
43 #include "trace.h"
44 #include "trace_output.h"
45
46 /*
47  * On boot up, the ring buffer is set to the minimum size, so that
48  * we do not waste memory on systems that are not using tracing.
49  */
50 bool ring_buffer_expanded;
51
52 /*
53  * We need to change this state when a selftest is running.
54  * A selftest will lurk into the ring-buffer to count the
55  * entries inserted during the selftest although some concurrent
56  * insertions into the ring-buffer such as trace_printk could occurred
57  * at the same time, giving false positive or negative results.
58  */
59 static bool __read_mostly tracing_selftest_running;
60
61 /*
62  * If a tracer is running, we do not want to run SELFTEST.
63  */
64 bool __read_mostly tracing_selftest_disabled;
65
66 /* For tracers that don't implement custom flags */
67 static struct tracer_opt dummy_tracer_opt[] = {
68         { }
69 };
70
71 static struct tracer_flags dummy_tracer_flags = {
72         .val = 0,
73         .opts = dummy_tracer_opt
74 };
75
76 static int dummy_set_flag(u32 old_flags, u32 bit, int set)
77 {
78         return 0;
79 }
80
81 /*
82  * To prevent the comm cache from being overwritten when no
83  * tracing is active, only save the comm when a trace event
84  * occurred.
85  */
86 static DEFINE_PER_CPU(bool, trace_cmdline_save);
87
88 /*
89  * Kill all tracing for good (never come back).
90  * It is initialized to 1 but will turn to zero if the initialization
91  * of the tracer is successful. But that is the only place that sets
92  * this back to zero.
93  */
94 static int tracing_disabled = 1;
95
96 DEFINE_PER_CPU(int, ftrace_cpu_disabled);
97
98 cpumask_var_t __read_mostly     tracing_buffer_mask;
99
100 /*
101  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
102  *
103  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
104  * is set, then ftrace_dump is called. This will output the contents
105  * of the ftrace buffers to the console.  This is very useful for
106  * capturing traces that lead to crashes and outputing it to a
107  * serial console.
108  *
109  * It is default off, but you can enable it with either specifying
110  * "ftrace_dump_on_oops" in the kernel command line, or setting
111  * /proc/sys/kernel/ftrace_dump_on_oops
112  * Set 1 if you want to dump buffers of all CPUs
113  * Set 2 if you want to dump the buffer of the CPU that triggered oops
114  */
115
116 enum ftrace_dump_mode ftrace_dump_on_oops;
117
118 /* When set, tracing will stop when a WARN*() is hit */
119 int __disable_trace_on_warning;
120
121 static int tracing_set_tracer(const char *buf);
122
123 #define MAX_TRACER_SIZE         100
124 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
125 static char *default_bootup_tracer;
126
127 static bool allocate_snapshot;
128
129 static int __init set_cmdline_ftrace(char *str)
130 {
131         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
132         default_bootup_tracer = bootup_tracer_buf;
133         /* We are using ftrace early, expand it */
134         ring_buffer_expanded = true;
135         return 1;
136 }
137 __setup("ftrace=", set_cmdline_ftrace);
138
139 static int __init set_ftrace_dump_on_oops(char *str)
140 {
141         if (*str++ != '=' || !*str) {
142                 ftrace_dump_on_oops = DUMP_ALL;
143                 return 1;
144         }
145
146         if (!strcmp("orig_cpu", str)) {
147                 ftrace_dump_on_oops = DUMP_ORIG;
148                 return 1;
149         }
150
151         return 0;
152 }
153 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
154
155 static int __init stop_trace_on_warning(char *str)
156 {
157         __disable_trace_on_warning = 1;
158         return 1;
159 }
160 __setup("traceoff_on_warning=", stop_trace_on_warning);
161
162 static int __init boot_alloc_snapshot(char *str)
163 {
164         allocate_snapshot = true;
165         /* We also need the main ring buffer expanded */
166         ring_buffer_expanded = true;
167         return 1;
168 }
169 __setup("alloc_snapshot", boot_alloc_snapshot);
170
171
172 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
173 static char *trace_boot_options __initdata;
174
175 static int __init set_trace_boot_options(char *str)
176 {
177         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
178         trace_boot_options = trace_boot_options_buf;
179         return 0;
180 }
181 __setup("trace_options=", set_trace_boot_options);
182
183
184 unsigned long long ns2usecs(cycle_t nsec)
185 {
186         nsec += 500;
187         do_div(nsec, 1000);
188         return nsec;
189 }
190
191 /*
192  * The global_trace is the descriptor that holds the tracing
193  * buffers for the live tracing. For each CPU, it contains
194  * a link list of pages that will store trace entries. The
195  * page descriptor of the pages in the memory is used to hold
196  * the link list by linking the lru item in the page descriptor
197  * to each of the pages in the buffer per CPU.
198  *
199  * For each active CPU there is a data field that holds the
200  * pages for the buffer for that CPU. Each CPU has the same number
201  * of pages allocated for its buffer.
202  */
203 static struct trace_array       global_trace;
204
205 LIST_HEAD(ftrace_trace_arrays);
206
207 int trace_array_get(struct trace_array *this_tr)
208 {
209         struct trace_array *tr;
210         int ret = -ENODEV;
211
212         mutex_lock(&trace_types_lock);
213         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
214                 if (tr == this_tr) {
215                         tr->ref++;
216                         ret = 0;
217                         break;
218                 }
219         }
220         mutex_unlock(&trace_types_lock);
221
222         return ret;
223 }
224
225 static void __trace_array_put(struct trace_array *this_tr)
226 {
227         WARN_ON(!this_tr->ref);
228         this_tr->ref--;
229 }
230
231 void trace_array_put(struct trace_array *this_tr)
232 {
233         mutex_lock(&trace_types_lock);
234         __trace_array_put(this_tr);
235         mutex_unlock(&trace_types_lock);
236 }
237
238 int filter_check_discard(struct ftrace_event_file *file, void *rec,
239                          struct ring_buffer *buffer,
240                          struct ring_buffer_event *event)
241 {
242         if (unlikely(file->flags & FTRACE_EVENT_FL_FILTERED) &&
243             !filter_match_preds(file->filter, rec)) {
244                 ring_buffer_discard_commit(buffer, event);
245                 return 1;
246         }
247
248         return 0;
249 }
250 EXPORT_SYMBOL_GPL(filter_check_discard);
251
252 int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
253                               struct ring_buffer *buffer,
254                               struct ring_buffer_event *event)
255 {
256         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
257             !filter_match_preds(call->filter, rec)) {
258                 ring_buffer_discard_commit(buffer, event);
259                 return 1;
260         }
261
262         return 0;
263 }
264 EXPORT_SYMBOL_GPL(call_filter_check_discard);
265
266 cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
267 {
268         u64 ts;
269
270         /* Early boot up does not have a buffer yet */
271         if (!buf->buffer)
272                 return trace_clock_local();
273
274         ts = ring_buffer_time_stamp(buf->buffer, cpu);
275         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
276
277         return ts;
278 }
279
280 cycle_t ftrace_now(int cpu)
281 {
282         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
283 }
284
285 /**
286  * tracing_is_enabled - Show if global_trace has been disabled
287  *
288  * Shows if the global trace has been enabled or not. It uses the
289  * mirror flag "buffer_disabled" to be used in fast paths such as for
290  * the irqsoff tracer. But it may be inaccurate due to races. If you
291  * need to know the accurate state, use tracing_is_on() which is a little
292  * slower, but accurate.
293  */
294 int tracing_is_enabled(void)
295 {
296         /*
297          * For quick access (irqsoff uses this in fast path), just
298          * return the mirror variable of the state of the ring buffer.
299          * It's a little racy, but we don't really care.
300          */
301         smp_rmb();
302         return !global_trace.buffer_disabled;
303 }
304
305 /*
306  * trace_buf_size is the size in bytes that is allocated
307  * for a buffer. Note, the number of bytes is always rounded
308  * to page size.
309  *
310  * This number is purposely set to a low number of 16384.
311  * If the dump on oops happens, it will be much appreciated
312  * to not have to wait for all that output. Anyway this can be
313  * boot time and run time configurable.
314  */
315 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
316
317 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
318
319 /* trace_types holds a link list of available tracers. */
320 static struct tracer            *trace_types __read_mostly;
321
322 /*
323  * trace_types_lock is used to protect the trace_types list.
324  */
325 DEFINE_MUTEX(trace_types_lock);
326
327 /*
328  * serialize the access of the ring buffer
329  *
330  * ring buffer serializes readers, but it is low level protection.
331  * The validity of the events (which returns by ring_buffer_peek() ..etc)
332  * are not protected by ring buffer.
333  *
334  * The content of events may become garbage if we allow other process consumes
335  * these events concurrently:
336  *   A) the page of the consumed events may become a normal page
337  *      (not reader page) in ring buffer, and this page will be rewrited
338  *      by events producer.
339  *   B) The page of the consumed events may become a page for splice_read,
340  *      and this page will be returned to system.
341  *
342  * These primitives allow multi process access to different cpu ring buffer
343  * concurrently.
344  *
345  * These primitives don't distinguish read-only and read-consume access.
346  * Multi read-only access are also serialized.
347  */
348
349 #ifdef CONFIG_SMP
350 static DECLARE_RWSEM(all_cpu_access_lock);
351 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
352
353 static inline void trace_access_lock(int cpu)
354 {
355         if (cpu == RING_BUFFER_ALL_CPUS) {
356                 /* gain it for accessing the whole ring buffer. */
357                 down_write(&all_cpu_access_lock);
358         } else {
359                 /* gain it for accessing a cpu ring buffer. */
360
361                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
362                 down_read(&all_cpu_access_lock);
363
364                 /* Secondly block other access to this @cpu ring buffer. */
365                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
366         }
367 }
368
369 static inline void trace_access_unlock(int cpu)
370 {
371         if (cpu == RING_BUFFER_ALL_CPUS) {
372                 up_write(&all_cpu_access_lock);
373         } else {
374                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
375                 up_read(&all_cpu_access_lock);
376         }
377 }
378
379 static inline void trace_access_lock_init(void)
380 {
381         int cpu;
382
383         for_each_possible_cpu(cpu)
384                 mutex_init(&per_cpu(cpu_access_lock, cpu));
385 }
386
387 #else
388
389 static DEFINE_MUTEX(access_lock);
390
391 static inline void trace_access_lock(int cpu)
392 {
393         (void)cpu;
394         mutex_lock(&access_lock);
395 }
396
397 static inline void trace_access_unlock(int cpu)
398 {
399         (void)cpu;
400         mutex_unlock(&access_lock);
401 }
402
403 static inline void trace_access_lock_init(void)
404 {
405 }
406
407 #endif
408
409 /* trace_flags holds trace_options default values */
410 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
411         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
412         TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
413         TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION;
414
415 static void tracer_tracing_on(struct trace_array *tr)
416 {
417         if (tr->trace_buffer.buffer)
418                 ring_buffer_record_on(tr->trace_buffer.buffer);
419         /*
420          * This flag is looked at when buffers haven't been allocated
421          * yet, or by some tracers (like irqsoff), that just want to
422          * know if the ring buffer has been disabled, but it can handle
423          * races of where it gets disabled but we still do a record.
424          * As the check is in the fast path of the tracers, it is more
425          * important to be fast than accurate.
426          */
427         tr->buffer_disabled = 0;
428         /* Make the flag seen by readers */
429         smp_wmb();
430 }
431
432 /**
433  * tracing_on - enable tracing buffers
434  *
435  * This function enables tracing buffers that may have been
436  * disabled with tracing_off.
437  */
438 void tracing_on(void)
439 {
440         tracer_tracing_on(&global_trace);
441 }
442 EXPORT_SYMBOL_GPL(tracing_on);
443
444 /**
445  * __trace_puts - write a constant string into the trace buffer.
446  * @ip:    The address of the caller
447  * @str:   The constant string to write
448  * @size:  The size of the string.
449  */
450 int __trace_puts(unsigned long ip, const char *str, int size)
451 {
452         struct ring_buffer_event *event;
453         struct ring_buffer *buffer;
454         struct print_entry *entry;
455         unsigned long irq_flags;
456         int alloc;
457
458         alloc = sizeof(*entry) + size + 2; /* possible \n added */
459
460         local_save_flags(irq_flags);
461         buffer = global_trace.trace_buffer.buffer;
462         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
463                                           irq_flags, preempt_count());
464         if (!event)
465                 return 0;
466
467         entry = ring_buffer_event_data(event);
468         entry->ip = ip;
469
470         memcpy(&entry->buf, str, size);
471
472         /* Add a newline if necessary */
473         if (entry->buf[size - 1] != '\n') {
474                 entry->buf[size] = '\n';
475                 entry->buf[size + 1] = '\0';
476         } else
477                 entry->buf[size] = '\0';
478
479         __buffer_unlock_commit(buffer, event);
480
481         return size;
482 }
483 EXPORT_SYMBOL_GPL(__trace_puts);
484
485 /**
486  * __trace_bputs - write the pointer to a constant string into trace buffer
487  * @ip:    The address of the caller
488  * @str:   The constant string to write to the buffer to
489  */
490 int __trace_bputs(unsigned long ip, const char *str)
491 {
492         struct ring_buffer_event *event;
493         struct ring_buffer *buffer;
494         struct bputs_entry *entry;
495         unsigned long irq_flags;
496         int size = sizeof(struct bputs_entry);
497
498         local_save_flags(irq_flags);
499         buffer = global_trace.trace_buffer.buffer;
500         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
501                                           irq_flags, preempt_count());
502         if (!event)
503                 return 0;
504
505         entry = ring_buffer_event_data(event);
506         entry->ip                       = ip;
507         entry->str                      = str;
508
509         __buffer_unlock_commit(buffer, event);
510
511         return 1;
512 }
513 EXPORT_SYMBOL_GPL(__trace_bputs);
514
515 #ifdef CONFIG_TRACER_SNAPSHOT
516 /**
517  * trace_snapshot - take a snapshot of the current buffer.
518  *
519  * This causes a swap between the snapshot buffer and the current live
520  * tracing buffer. You can use this to take snapshots of the live
521  * trace when some condition is triggered, but continue to trace.
522  *
523  * Note, make sure to allocate the snapshot with either
524  * a tracing_snapshot_alloc(), or by doing it manually
525  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
526  *
527  * If the snapshot buffer is not allocated, it will stop tracing.
528  * Basically making a permanent snapshot.
529  */
530 void tracing_snapshot(void)
531 {
532         struct trace_array *tr = &global_trace;
533         struct tracer *tracer = tr->current_trace;
534         unsigned long flags;
535
536         if (in_nmi()) {
537                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
538                 internal_trace_puts("*** snapshot is being ignored        ***\n");
539                 return;
540         }
541
542         if (!tr->allocated_snapshot) {
543                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
544                 internal_trace_puts("*** stopping trace here!   ***\n");
545                 tracing_off();
546                 return;
547         }
548
549         /* Note, snapshot can not be used when the tracer uses it */
550         if (tracer->use_max_tr) {
551                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
552                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
553                 return;
554         }
555
556         local_irq_save(flags);
557         update_max_tr(tr, current, smp_processor_id());
558         local_irq_restore(flags);
559 }
560 EXPORT_SYMBOL_GPL(tracing_snapshot);
561
562 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
563                                         struct trace_buffer *size_buf, int cpu_id);
564 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
565
566 static int alloc_snapshot(struct trace_array *tr)
567 {
568         int ret;
569
570         if (!tr->allocated_snapshot) {
571
572                 /* allocate spare buffer */
573                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
574                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
575                 if (ret < 0)
576                         return ret;
577
578                 tr->allocated_snapshot = true;
579         }
580
581         return 0;
582 }
583
584 void free_snapshot(struct trace_array *tr)
585 {
586         /*
587          * We don't free the ring buffer. instead, resize it because
588          * The max_tr ring buffer has some state (e.g. ring->clock) and
589          * we want preserve it.
590          */
591         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
592         set_buffer_entries(&tr->max_buffer, 1);
593         tracing_reset_online_cpus(&tr->max_buffer);
594         tr->allocated_snapshot = false;
595 }
596
597 /**
598  * tracing_alloc_snapshot - allocate snapshot buffer.
599  *
600  * This only allocates the snapshot buffer if it isn't already
601  * allocated - it doesn't also take a snapshot.
602  *
603  * This is meant to be used in cases where the snapshot buffer needs
604  * to be set up for events that can't sleep but need to be able to
605  * trigger a snapshot.
606  */
607 int tracing_alloc_snapshot(void)
608 {
609         struct trace_array *tr = &global_trace;
610         int ret;
611
612         ret = alloc_snapshot(tr);
613         WARN_ON(ret < 0);
614
615         return ret;
616 }
617 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
618
619 /**
620  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
621  *
622  * This is similar to trace_snapshot(), but it will allocate the
623  * snapshot buffer if it isn't already allocated. Use this only
624  * where it is safe to sleep, as the allocation may sleep.
625  *
626  * This causes a swap between the snapshot buffer and the current live
627  * tracing buffer. You can use this to take snapshots of the live
628  * trace when some condition is triggered, but continue to trace.
629  */
630 void tracing_snapshot_alloc(void)
631 {
632         int ret;
633
634         ret = tracing_alloc_snapshot();
635         if (ret < 0)
636                 return;
637
638         tracing_snapshot();
639 }
640 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
641 #else
642 void tracing_snapshot(void)
643 {
644         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
645 }
646 EXPORT_SYMBOL_GPL(tracing_snapshot);
647 int tracing_alloc_snapshot(void)
648 {
649         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
650         return -ENODEV;
651 }
652 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
653 void tracing_snapshot_alloc(void)
654 {
655         /* Give warning */
656         tracing_snapshot();
657 }
658 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
659 #endif /* CONFIG_TRACER_SNAPSHOT */
660
661 static void tracer_tracing_off(struct trace_array *tr)
662 {
663         if (tr->trace_buffer.buffer)
664                 ring_buffer_record_off(tr->trace_buffer.buffer);
665         /*
666          * This flag is looked at when buffers haven't been allocated
667          * yet, or by some tracers (like irqsoff), that just want to
668          * know if the ring buffer has been disabled, but it can handle
669          * races of where it gets disabled but we still do a record.
670          * As the check is in the fast path of the tracers, it is more
671          * important to be fast than accurate.
672          */
673         tr->buffer_disabled = 1;
674         /* Make the flag seen by readers */
675         smp_wmb();
676 }
677
678 /**
679  * tracing_off - turn off tracing buffers
680  *
681  * This function stops the tracing buffers from recording data.
682  * It does not disable any overhead the tracers themselves may
683  * be causing. This function simply causes all recording to
684  * the ring buffers to fail.
685  */
686 void tracing_off(void)
687 {
688         tracer_tracing_off(&global_trace);
689 }
690 EXPORT_SYMBOL_GPL(tracing_off);
691
692 void disable_trace_on_warning(void)
693 {
694         if (__disable_trace_on_warning)
695                 tracing_off();
696 }
697
698 /**
699  * tracer_tracing_is_on - show real state of ring buffer enabled
700  * @tr : the trace array to know if ring buffer is enabled
701  *
702  * Shows real state of the ring buffer if it is enabled or not.
703  */
704 static int tracer_tracing_is_on(struct trace_array *tr)
705 {
706         if (tr->trace_buffer.buffer)
707                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
708         return !tr->buffer_disabled;
709 }
710
711 /**
712  * tracing_is_on - show state of ring buffers enabled
713  */
714 int tracing_is_on(void)
715 {
716         return tracer_tracing_is_on(&global_trace);
717 }
718 EXPORT_SYMBOL_GPL(tracing_is_on);
719
720 static int __init set_buf_size(char *str)
721 {
722         unsigned long buf_size;
723
724         if (!str)
725                 return 0;
726         buf_size = memparse(str, &str);
727         /* nr_entries can not be zero */
728         if (buf_size == 0)
729                 return 0;
730         trace_buf_size = buf_size;
731         return 1;
732 }
733 __setup("trace_buf_size=", set_buf_size);
734
735 static int __init set_tracing_thresh(char *str)
736 {
737         unsigned long threshold;
738         int ret;
739
740         if (!str)
741                 return 0;
742         ret = kstrtoul(str, 0, &threshold);
743         if (ret < 0)
744                 return 0;
745         tracing_thresh = threshold * 1000;
746         return 1;
747 }
748 __setup("tracing_thresh=", set_tracing_thresh);
749
750 unsigned long nsecs_to_usecs(unsigned long nsecs)
751 {
752         return nsecs / 1000;
753 }
754
755 /* These must match the bit postions in trace_iterator_flags */
756 static const char *trace_options[] = {
757         "print-parent",
758         "sym-offset",
759         "sym-addr",
760         "verbose",
761         "raw",
762         "hex",
763         "bin",
764         "block",
765         "stacktrace",
766         "trace_printk",
767         "ftrace_preempt",
768         "branch",
769         "annotate",
770         "userstacktrace",
771         "sym-userobj",
772         "printk-msg-only",
773         "context-info",
774         "latency-format",
775         "sleep-time",
776         "graph-time",
777         "record-cmd",
778         "overwrite",
779         "disable_on_free",
780         "irq-info",
781         "markers",
782         "function-trace",
783         NULL
784 };
785
786 static struct {
787         u64 (*func)(void);
788         const char *name;
789         int in_ns;              /* is this clock in nanoseconds? */
790 } trace_clocks[] = {
791         { trace_clock_local,    "local",        1 },
792         { trace_clock_global,   "global",       1 },
793         { trace_clock_counter,  "counter",      0 },
794         { trace_clock_jiffies,  "uptime",       1 },
795         { trace_clock,          "perf",         1 },
796         ARCH_TRACE_CLOCKS
797 };
798
799 /*
800  * trace_parser_get_init - gets the buffer for trace parser
801  */
802 int trace_parser_get_init(struct trace_parser *parser, int size)
803 {
804         memset(parser, 0, sizeof(*parser));
805
806         parser->buffer = kmalloc(size, GFP_KERNEL);
807         if (!parser->buffer)
808                 return 1;
809
810         parser->size = size;
811         return 0;
812 }
813
814 /*
815  * trace_parser_put - frees the buffer for trace parser
816  */
817 void trace_parser_put(struct trace_parser *parser)
818 {
819         kfree(parser->buffer);
820 }
821
822 /*
823  * trace_get_user - reads the user input string separated by  space
824  * (matched by isspace(ch))
825  *
826  * For each string found the 'struct trace_parser' is updated,
827  * and the function returns.
828  *
829  * Returns number of bytes read.
830  *
831  * See kernel/trace/trace.h for 'struct trace_parser' details.
832  */
833 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
834         size_t cnt, loff_t *ppos)
835 {
836         char ch;
837         size_t read = 0;
838         ssize_t ret;
839
840         if (!*ppos)
841                 trace_parser_clear(parser);
842
843         ret = get_user(ch, ubuf++);
844         if (ret)
845                 goto out;
846
847         read++;
848         cnt--;
849
850         /*
851          * The parser is not finished with the last write,
852          * continue reading the user input without skipping spaces.
853          */
854         if (!parser->cont) {
855                 /* skip white space */
856                 while (cnt && isspace(ch)) {
857                         ret = get_user(ch, ubuf++);
858                         if (ret)
859                                 goto out;
860                         read++;
861                         cnt--;
862                 }
863
864                 /* only spaces were written */
865                 if (isspace(ch)) {
866                         *ppos += read;
867                         ret = read;
868                         goto out;
869                 }
870
871                 parser->idx = 0;
872         }
873
874         /* read the non-space input */
875         while (cnt && !isspace(ch)) {
876                 if (parser->idx < parser->size - 1)
877                         parser->buffer[parser->idx++] = ch;
878                 else {
879                         ret = -EINVAL;
880                         goto out;
881                 }
882                 ret = get_user(ch, ubuf++);
883                 if (ret)
884                         goto out;
885                 read++;
886                 cnt--;
887         }
888
889         /* We either got finished input or we have to wait for another call. */
890         if (isspace(ch)) {
891                 parser->buffer[parser->idx] = 0;
892                 parser->cont = false;
893         } else if (parser->idx < parser->size - 1) {
894                 parser->cont = true;
895                 parser->buffer[parser->idx++] = ch;
896         } else {
897                 ret = -EINVAL;
898                 goto out;
899         }
900
901         *ppos += read;
902         ret = read;
903
904 out:
905         return ret;
906 }
907
908 ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
909 {
910         int len;
911         int ret;
912
913         if (!cnt)
914                 return 0;
915
916         if (s->len <= s->readpos)
917                 return -EBUSY;
918
919         len = s->len - s->readpos;
920         if (cnt > len)
921                 cnt = len;
922         ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
923         if (ret == cnt)
924                 return -EFAULT;
925
926         cnt -= ret;
927
928         s->readpos += cnt;
929         return cnt;
930 }
931
932 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
933 {
934         int len;
935
936         if (s->len <= s->readpos)
937                 return -EBUSY;
938
939         len = s->len - s->readpos;
940         if (cnt > len)
941                 cnt = len;
942         memcpy(buf, s->buffer + s->readpos, cnt);
943
944         s->readpos += cnt;
945         return cnt;
946 }
947
948 /*
949  * ftrace_max_lock is used to protect the swapping of buffers
950  * when taking a max snapshot. The buffers themselves are
951  * protected by per_cpu spinlocks. But the action of the swap
952  * needs its own lock.
953  *
954  * This is defined as a arch_spinlock_t in order to help
955  * with performance when lockdep debugging is enabled.
956  *
957  * It is also used in other places outside the update_max_tr
958  * so it needs to be defined outside of the
959  * CONFIG_TRACER_MAX_TRACE.
960  */
961 static arch_spinlock_t ftrace_max_lock =
962         (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
963
964 unsigned long __read_mostly     tracing_thresh;
965
966 #ifdef CONFIG_TRACER_MAX_TRACE
967 unsigned long __read_mostly     tracing_max_latency;
968
969 /*
970  * Copy the new maximum trace into the separate maximum-trace
971  * structure. (this way the maximum trace is permanently saved,
972  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
973  */
974 static void
975 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
976 {
977         struct trace_buffer *trace_buf = &tr->trace_buffer;
978         struct trace_buffer *max_buf = &tr->max_buffer;
979         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
980         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
981
982         max_buf->cpu = cpu;
983         max_buf->time_start = data->preempt_timestamp;
984
985         max_data->saved_latency = tracing_max_latency;
986         max_data->critical_start = data->critical_start;
987         max_data->critical_end = data->critical_end;
988
989         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
990         max_data->pid = tsk->pid;
991         /*
992          * If tsk == current, then use current_uid(), as that does not use
993          * RCU. The irq tracer can be called out of RCU scope.
994          */
995         if (tsk == current)
996                 max_data->uid = current_uid();
997         else
998                 max_data->uid = task_uid(tsk);
999
1000         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1001         max_data->policy = tsk->policy;
1002         max_data->rt_priority = tsk->rt_priority;
1003
1004         /* record this tasks comm */
1005         tracing_record_cmdline(tsk);
1006 }
1007
1008 /**
1009  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1010  * @tr: tracer
1011  * @tsk: the task with the latency
1012  * @cpu: The cpu that initiated the trace.
1013  *
1014  * Flip the buffers between the @tr and the max_tr and record information
1015  * about which task was the cause of this latency.
1016  */
1017 void
1018 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1019 {
1020         struct ring_buffer *buf;
1021
1022         if (tr->stop_count)
1023                 return;
1024
1025         WARN_ON_ONCE(!irqs_disabled());
1026
1027         if (!tr->allocated_snapshot) {
1028                 /* Only the nop tracer should hit this when disabling */
1029                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1030                 return;
1031         }
1032
1033         arch_spin_lock(&ftrace_max_lock);
1034
1035         buf = tr->trace_buffer.buffer;
1036         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1037         tr->max_buffer.buffer = buf;
1038
1039         __update_max_tr(tr, tsk, cpu);
1040         arch_spin_unlock(&ftrace_max_lock);
1041 }
1042
1043 /**
1044  * update_max_tr_single - only copy one trace over, and reset the rest
1045  * @tr - tracer
1046  * @tsk - task with the latency
1047  * @cpu - the cpu of the buffer to copy.
1048  *
1049  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1050  */
1051 void
1052 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1053 {
1054         int ret;
1055
1056         if (tr->stop_count)
1057                 return;
1058
1059         WARN_ON_ONCE(!irqs_disabled());
1060         if (!tr->allocated_snapshot) {
1061                 /* Only the nop tracer should hit this when disabling */
1062                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1063                 return;
1064         }
1065
1066         arch_spin_lock(&ftrace_max_lock);
1067
1068         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1069
1070         if (ret == -EBUSY) {
1071                 /*
1072                  * We failed to swap the buffer due to a commit taking
1073                  * place on this CPU. We fail to record, but we reset
1074                  * the max trace buffer (no one writes directly to it)
1075                  * and flag that it failed.
1076                  */
1077                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1078                         "Failed to swap buffers due to commit in progress\n");
1079         }
1080
1081         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1082
1083         __update_max_tr(tr, tsk, cpu);
1084         arch_spin_unlock(&ftrace_max_lock);
1085 }
1086 #endif /* CONFIG_TRACER_MAX_TRACE */
1087
1088 static void default_wait_pipe(struct trace_iterator *iter)
1089 {
1090         /* Iterators are static, they should be filled or empty */
1091         if (trace_buffer_iter(iter, iter->cpu_file))
1092                 return;
1093
1094         ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
1095 }
1096
1097 #ifdef CONFIG_FTRACE_STARTUP_TEST
1098 static int run_tracer_selftest(struct tracer *type)
1099 {
1100         struct trace_array *tr = &global_trace;
1101         struct tracer *saved_tracer = tr->current_trace;
1102         int ret;
1103
1104         if (!type->selftest || tracing_selftest_disabled)
1105                 return 0;
1106
1107         /*
1108          * Run a selftest on this tracer.
1109          * Here we reset the trace buffer, and set the current
1110          * tracer to be this tracer. The tracer can then run some
1111          * internal tracing to verify that everything is in order.
1112          * If we fail, we do not register this tracer.
1113          */
1114         tracing_reset_online_cpus(&tr->trace_buffer);
1115
1116         tr->current_trace = type;
1117
1118 #ifdef CONFIG_TRACER_MAX_TRACE
1119         if (type->use_max_tr) {
1120                 /* If we expanded the buffers, make sure the max is expanded too */
1121                 if (ring_buffer_expanded)
1122                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1123                                            RING_BUFFER_ALL_CPUS);
1124                 tr->allocated_snapshot = true;
1125         }
1126 #endif
1127
1128         /* the test is responsible for initializing and enabling */
1129         pr_info("Testing tracer %s: ", type->name);
1130         ret = type->selftest(type, tr);
1131         /* the test is responsible for resetting too */
1132         tr->current_trace = saved_tracer;
1133         if (ret) {
1134                 printk(KERN_CONT "FAILED!\n");
1135                 /* Add the warning after printing 'FAILED' */
1136                 WARN_ON(1);
1137                 return -1;
1138         }
1139         /* Only reset on passing, to avoid touching corrupted buffers */
1140         tracing_reset_online_cpus(&tr->trace_buffer);
1141
1142 #ifdef CONFIG_TRACER_MAX_TRACE
1143         if (type->use_max_tr) {
1144                 tr->allocated_snapshot = false;
1145
1146                 /* Shrink the max buffer again */
1147                 if (ring_buffer_expanded)
1148                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1149                                            RING_BUFFER_ALL_CPUS);
1150         }
1151 #endif
1152
1153         printk(KERN_CONT "PASSED\n");
1154         return 0;
1155 }
1156 #else
1157 static inline int run_tracer_selftest(struct tracer *type)
1158 {
1159         return 0;
1160 }
1161 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1162
1163 /**
1164  * register_tracer - register a tracer with the ftrace system.
1165  * @type - the plugin for the tracer
1166  *
1167  * Register a new plugin tracer.
1168  */
1169 int register_tracer(struct tracer *type)
1170 {
1171         struct tracer *t;
1172         int ret = 0;
1173
1174         if (!type->name) {
1175                 pr_info("Tracer must have a name\n");
1176                 return -1;
1177         }
1178
1179         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1180                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1181                 return -1;
1182         }
1183
1184         mutex_lock(&trace_types_lock);
1185
1186         tracing_selftest_running = true;
1187
1188         for (t = trace_types; t; t = t->next) {
1189                 if (strcmp(type->name, t->name) == 0) {
1190                         /* already found */
1191                         pr_info("Tracer %s already registered\n",
1192                                 type->name);
1193                         ret = -1;
1194                         goto out;
1195                 }
1196         }
1197
1198         if (!type->set_flag)
1199                 type->set_flag = &dummy_set_flag;
1200         if (!type->flags)
1201                 type->flags = &dummy_tracer_flags;
1202         else
1203                 if (!type->flags->opts)
1204                         type->flags->opts = dummy_tracer_opt;
1205         if (!type->wait_pipe)
1206                 type->wait_pipe = default_wait_pipe;
1207
1208         ret = run_tracer_selftest(type);
1209         if (ret < 0)
1210                 goto out;
1211
1212         type->next = trace_types;
1213         trace_types = type;
1214
1215  out:
1216         tracing_selftest_running = false;
1217         mutex_unlock(&trace_types_lock);
1218
1219         if (ret || !default_bootup_tracer)
1220                 goto out_unlock;
1221
1222         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1223                 goto out_unlock;
1224
1225         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1226         /* Do we want this tracer to start on bootup? */
1227         tracing_set_tracer(type->name);
1228         default_bootup_tracer = NULL;
1229         /* disable other selftests, since this will break it. */
1230         tracing_selftest_disabled = true;
1231 #ifdef CONFIG_FTRACE_STARTUP_TEST
1232         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1233                type->name);
1234 #endif
1235
1236  out_unlock:
1237         return ret;
1238 }
1239
1240 void tracing_reset(struct trace_buffer *buf, int cpu)
1241 {
1242         struct ring_buffer *buffer = buf->buffer;
1243
1244         if (!buffer)
1245                 return;
1246
1247         ring_buffer_record_disable(buffer);
1248
1249         /* Make sure all commits have finished */
1250         synchronize_sched();
1251         ring_buffer_reset_cpu(buffer, cpu);
1252
1253         ring_buffer_record_enable(buffer);
1254 }
1255
1256 void tracing_reset_online_cpus(struct trace_buffer *buf)
1257 {
1258         struct ring_buffer *buffer = buf->buffer;
1259         int cpu;
1260
1261         if (!buffer)
1262                 return;
1263
1264         ring_buffer_record_disable(buffer);
1265
1266         /* Make sure all commits have finished */
1267         synchronize_sched();
1268
1269         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1270
1271         for_each_online_cpu(cpu)
1272                 ring_buffer_reset_cpu(buffer, cpu);
1273
1274         ring_buffer_record_enable(buffer);
1275 }
1276
1277 /* Must have trace_types_lock held */
1278 void tracing_reset_all_online_cpus(void)
1279 {
1280         struct trace_array *tr;
1281
1282         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1283                 tracing_reset_online_cpus(&tr->trace_buffer);
1284 #ifdef CONFIG_TRACER_MAX_TRACE
1285                 tracing_reset_online_cpus(&tr->max_buffer);
1286 #endif
1287         }
1288 }
1289
1290 #define SAVED_CMDLINES 128
1291 #define NO_CMDLINE_MAP UINT_MAX
1292 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1293 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
1294 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
1295 static int cmdline_idx;
1296 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1297
1298 /* temporary disable recording */
1299 static atomic_t trace_record_cmdline_disabled __read_mostly;
1300
1301 static void trace_init_cmdlines(void)
1302 {
1303         memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
1304         memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
1305         cmdline_idx = 0;
1306 }
1307
1308 int is_tracing_stopped(void)
1309 {
1310         return global_trace.stop_count;
1311 }
1312
1313 /**
1314  * tracing_start - quick start of the tracer
1315  *
1316  * If tracing is enabled but was stopped by tracing_stop,
1317  * this will start the tracer back up.
1318  */
1319 void tracing_start(void)
1320 {
1321         struct ring_buffer *buffer;
1322         unsigned long flags;
1323
1324         if (tracing_disabled)
1325                 return;
1326
1327         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1328         if (--global_trace.stop_count) {
1329                 if (global_trace.stop_count < 0) {
1330                         /* Someone screwed up their debugging */
1331                         WARN_ON_ONCE(1);
1332                         global_trace.stop_count = 0;
1333                 }
1334                 goto out;
1335         }
1336
1337         /* Prevent the buffers from switching */
1338         arch_spin_lock(&ftrace_max_lock);
1339
1340         buffer = global_trace.trace_buffer.buffer;
1341         if (buffer)
1342                 ring_buffer_record_enable(buffer);
1343
1344 #ifdef CONFIG_TRACER_MAX_TRACE
1345         buffer = global_trace.max_buffer.buffer;
1346         if (buffer)
1347                 ring_buffer_record_enable(buffer);
1348 #endif
1349
1350         arch_spin_unlock(&ftrace_max_lock);
1351
1352         ftrace_start();
1353  out:
1354         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1355 }
1356
1357 static void tracing_start_tr(struct trace_array *tr)
1358 {
1359         struct ring_buffer *buffer;
1360         unsigned long flags;
1361
1362         if (tracing_disabled)
1363                 return;
1364
1365         /* If global, we need to also start the max tracer */
1366         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1367                 return tracing_start();
1368
1369         raw_spin_lock_irqsave(&tr->start_lock, flags);
1370
1371         if (--tr->stop_count) {
1372                 if (tr->stop_count < 0) {
1373                         /* Someone screwed up their debugging */
1374                         WARN_ON_ONCE(1);
1375                         tr->stop_count = 0;
1376                 }
1377                 goto out;
1378         }
1379
1380         buffer = tr->trace_buffer.buffer;
1381         if (buffer)
1382                 ring_buffer_record_enable(buffer);
1383
1384  out:
1385         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1386 }
1387
1388 /**
1389  * tracing_stop - quick stop of the tracer
1390  *
1391  * Light weight way to stop tracing. Use in conjunction with
1392  * tracing_start.
1393  */
1394 void tracing_stop(void)
1395 {
1396         struct ring_buffer *buffer;
1397         unsigned long flags;
1398
1399         ftrace_stop();
1400         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1401         if (global_trace.stop_count++)
1402                 goto out;
1403
1404         /* Prevent the buffers from switching */
1405         arch_spin_lock(&ftrace_max_lock);
1406
1407         buffer = global_trace.trace_buffer.buffer;
1408         if (buffer)
1409                 ring_buffer_record_disable(buffer);
1410
1411 #ifdef CONFIG_TRACER_MAX_TRACE
1412         buffer = global_trace.max_buffer.buffer;
1413         if (buffer)
1414                 ring_buffer_record_disable(buffer);
1415 #endif
1416
1417         arch_spin_unlock(&ftrace_max_lock);
1418
1419  out:
1420         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1421 }
1422
1423 static void tracing_stop_tr(struct trace_array *tr)
1424 {
1425         struct ring_buffer *buffer;
1426         unsigned long flags;
1427
1428         /* If global, we need to also stop the max tracer */
1429         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1430                 return tracing_stop();
1431
1432         raw_spin_lock_irqsave(&tr->start_lock, flags);
1433         if (tr->stop_count++)
1434                 goto out;
1435
1436         buffer = tr->trace_buffer.buffer;
1437         if (buffer)
1438                 ring_buffer_record_disable(buffer);
1439
1440  out:
1441         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1442 }
1443
1444 void trace_stop_cmdline_recording(void);
1445
1446 static void trace_save_cmdline(struct task_struct *tsk)
1447 {
1448         unsigned pid, idx;
1449
1450         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1451                 return;
1452
1453         /*
1454          * It's not the end of the world if we don't get
1455          * the lock, but we also don't want to spin
1456          * nor do we want to disable interrupts,
1457          * so if we miss here, then better luck next time.
1458          */
1459         if (!arch_spin_trylock(&trace_cmdline_lock))
1460                 return;
1461
1462         idx = map_pid_to_cmdline[tsk->pid];
1463         if (idx == NO_CMDLINE_MAP) {
1464                 idx = (cmdline_idx + 1) % SAVED_CMDLINES;
1465
1466                 /*
1467                  * Check whether the cmdline buffer at idx has a pid
1468                  * mapped. We are going to overwrite that entry so we
1469                  * need to clear the map_pid_to_cmdline. Otherwise we
1470                  * would read the new comm for the old pid.
1471                  */
1472                 pid = map_cmdline_to_pid[idx];
1473                 if (pid != NO_CMDLINE_MAP)
1474                         map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1475
1476                 map_cmdline_to_pid[idx] = tsk->pid;
1477                 map_pid_to_cmdline[tsk->pid] = idx;
1478
1479                 cmdline_idx = idx;
1480         }
1481
1482         memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
1483
1484         arch_spin_unlock(&trace_cmdline_lock);
1485 }
1486
1487 void trace_find_cmdline(int pid, char comm[])
1488 {
1489         unsigned map;
1490
1491         if (!pid) {
1492                 strcpy(comm, "<idle>");
1493                 return;
1494         }
1495
1496         if (WARN_ON_ONCE(pid < 0)) {
1497                 strcpy(comm, "<XXX>");
1498                 return;
1499         }
1500
1501         if (pid > PID_MAX_DEFAULT) {
1502                 strcpy(comm, "<...>");
1503                 return;
1504         }
1505
1506         preempt_disable();
1507         arch_spin_lock(&trace_cmdline_lock);
1508         map = map_pid_to_cmdline[pid];
1509         if (map != NO_CMDLINE_MAP)
1510                 strcpy(comm, saved_cmdlines[map]);
1511         else
1512                 strcpy(comm, "<...>");
1513
1514         arch_spin_unlock(&trace_cmdline_lock);
1515         preempt_enable();
1516 }
1517
1518 void tracing_record_cmdline(struct task_struct *tsk)
1519 {
1520         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1521                 return;
1522
1523         if (!__this_cpu_read(trace_cmdline_save))
1524                 return;
1525
1526         __this_cpu_write(trace_cmdline_save, false);
1527
1528         trace_save_cmdline(tsk);
1529 }
1530
1531 void
1532 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1533                              int pc)
1534 {
1535         struct task_struct *tsk = current;
1536
1537         entry->preempt_count            = pc & 0xff;
1538         entry->pid                      = (tsk) ? tsk->pid : 0;
1539         entry->flags =
1540 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1541                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1542 #else
1543                 TRACE_FLAG_IRQS_NOSUPPORT |
1544 #endif
1545                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1546                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1547                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1548                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1549 }
1550 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1551
1552 struct ring_buffer_event *
1553 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1554                           int type,
1555                           unsigned long len,
1556                           unsigned long flags, int pc)
1557 {
1558         struct ring_buffer_event *event;
1559
1560         event = ring_buffer_lock_reserve(buffer, len);
1561         if (event != NULL) {
1562                 struct trace_entry *ent = ring_buffer_event_data(event);
1563
1564                 tracing_generic_entry_update(ent, flags, pc);
1565                 ent->type = type;
1566         }
1567
1568         return event;
1569 }
1570
1571 void
1572 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1573 {
1574         __this_cpu_write(trace_cmdline_save, true);
1575         ring_buffer_unlock_commit(buffer, event);
1576 }
1577
1578 static inline void
1579 __trace_buffer_unlock_commit(struct ring_buffer *buffer,
1580                              struct ring_buffer_event *event,
1581                              unsigned long flags, int pc)
1582 {
1583         __buffer_unlock_commit(buffer, event);
1584
1585         ftrace_trace_stack(buffer, flags, 6, pc);
1586         ftrace_trace_userstack(buffer, flags, pc);
1587 }
1588
1589 void trace_buffer_unlock_commit(struct ring_buffer *buffer,
1590                                 struct ring_buffer_event *event,
1591                                 unsigned long flags, int pc)
1592 {
1593         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1594 }
1595 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1596
1597 struct ring_buffer_event *
1598 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1599                           struct ftrace_event_file *ftrace_file,
1600                           int type, unsigned long len,
1601                           unsigned long flags, int pc)
1602 {
1603         *current_rb = ftrace_file->tr->trace_buffer.buffer;
1604         return trace_buffer_lock_reserve(*current_rb,
1605                                          type, len, flags, pc);
1606 }
1607 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1608
1609 struct ring_buffer_event *
1610 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1611                                   int type, unsigned long len,
1612                                   unsigned long flags, int pc)
1613 {
1614         *current_rb = global_trace.trace_buffer.buffer;
1615         return trace_buffer_lock_reserve(*current_rb,
1616                                          type, len, flags, pc);
1617 }
1618 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1619
1620 void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
1621                                         struct ring_buffer_event *event,
1622                                         unsigned long flags, int pc)
1623 {
1624         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1625 }
1626 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
1627
1628 void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
1629                                      struct ring_buffer_event *event,
1630                                      unsigned long flags, int pc,
1631                                      struct pt_regs *regs)
1632 {
1633         __buffer_unlock_commit(buffer, event);
1634
1635         ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
1636         ftrace_trace_userstack(buffer, flags, pc);
1637 }
1638 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1639
1640 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1641                                          struct ring_buffer_event *event)
1642 {
1643         ring_buffer_discard_commit(buffer, event);
1644 }
1645 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1646
1647 void
1648 trace_function(struct trace_array *tr,
1649                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1650                int pc)
1651 {
1652         struct ftrace_event_call *call = &event_function;
1653         struct ring_buffer *buffer = tr->trace_buffer.buffer;
1654         struct ring_buffer_event *event;
1655         struct ftrace_entry *entry;
1656
1657         /* If we are reading the ring buffer, don't trace */
1658         if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1659                 return;
1660
1661         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1662                                           flags, pc);
1663         if (!event)
1664                 return;
1665         entry   = ring_buffer_event_data(event);
1666         entry->ip                       = ip;
1667         entry->parent_ip                = parent_ip;
1668
1669         if (!call_filter_check_discard(call, entry, buffer, event))
1670                 __buffer_unlock_commit(buffer, event);
1671 }
1672
1673 #ifdef CONFIG_STACKTRACE
1674
1675 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1676 struct ftrace_stack {
1677         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
1678 };
1679
1680 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1681 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1682
1683 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1684                                  unsigned long flags,
1685                                  int skip, int pc, struct pt_regs *regs)
1686 {
1687         struct ftrace_event_call *call = &event_kernel_stack;
1688         struct ring_buffer_event *event;
1689         struct stack_entry *entry;
1690         struct stack_trace trace;
1691         int use_stack;
1692         int size = FTRACE_STACK_ENTRIES;
1693
1694         trace.nr_entries        = 0;
1695         trace.skip              = skip;
1696
1697         /*
1698          * Since events can happen in NMIs there's no safe way to
1699          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1700          * or NMI comes in, it will just have to use the default
1701          * FTRACE_STACK_SIZE.
1702          */
1703         preempt_disable_notrace();
1704
1705         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1706         /*
1707          * We don't need any atomic variables, just a barrier.
1708          * If an interrupt comes in, we don't care, because it would
1709          * have exited and put the counter back to what we want.
1710          * We just need a barrier to keep gcc from moving things
1711          * around.
1712          */
1713         barrier();
1714         if (use_stack == 1) {
1715                 trace.entries           = &__get_cpu_var(ftrace_stack).calls[0];
1716                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
1717
1718                 if (regs)
1719                         save_stack_trace_regs(regs, &trace);
1720                 else
1721                         save_stack_trace(&trace);
1722
1723                 if (trace.nr_entries > size)
1724                         size = trace.nr_entries;
1725         } else
1726                 /* From now on, use_stack is a boolean */
1727                 use_stack = 0;
1728
1729         size *= sizeof(unsigned long);
1730
1731         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1732                                           sizeof(*entry) + size, flags, pc);
1733         if (!event)
1734                 goto out;
1735         entry = ring_buffer_event_data(event);
1736
1737         memset(&entry->caller, 0, size);
1738
1739         if (use_stack)
1740                 memcpy(&entry->caller, trace.entries,
1741                        trace.nr_entries * sizeof(unsigned long));
1742         else {
1743                 trace.max_entries       = FTRACE_STACK_ENTRIES;
1744                 trace.entries           = entry->caller;
1745                 if (regs)
1746                         save_stack_trace_regs(regs, &trace);
1747                 else
1748                         save_stack_trace(&trace);
1749         }
1750
1751         entry->size = trace.nr_entries;
1752
1753         if (!call_filter_check_discard(call, entry, buffer, event))
1754                 __buffer_unlock_commit(buffer, event);
1755
1756  out:
1757         /* Again, don't let gcc optimize things here */
1758         barrier();
1759         __this_cpu_dec(ftrace_stack_reserve);
1760         preempt_enable_notrace();
1761
1762 }
1763
1764 void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
1765                              int skip, int pc, struct pt_regs *regs)
1766 {
1767         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1768                 return;
1769
1770         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1771 }
1772
1773 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1774                         int skip, int pc)
1775 {
1776         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1777                 return;
1778
1779         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
1780 }
1781
1782 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1783                    int pc)
1784 {
1785         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1786 }
1787
1788 /**
1789  * trace_dump_stack - record a stack back trace in the trace buffer
1790  * @skip: Number of functions to skip (helper handlers)
1791  */
1792 void trace_dump_stack(int skip)
1793 {
1794         unsigned long flags;
1795
1796         if (tracing_disabled || tracing_selftest_running)
1797                 return;
1798
1799         local_save_flags(flags);
1800
1801         /*
1802          * Skip 3 more, seems to get us at the caller of
1803          * this function.
1804          */
1805         skip += 3;
1806         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
1807                              flags, skip, preempt_count(), NULL);
1808 }
1809
1810 static DEFINE_PER_CPU(int, user_stack_count);
1811
1812 void
1813 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1814 {
1815         struct ftrace_event_call *call = &event_user_stack;
1816         struct ring_buffer_event *event;
1817         struct userstack_entry *entry;
1818         struct stack_trace trace;
1819
1820         if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1821                 return;
1822
1823         /*
1824          * NMIs can not handle page faults, even with fix ups.
1825          * The save user stack can (and often does) fault.
1826          */
1827         if (unlikely(in_nmi()))
1828                 return;
1829
1830         /*
1831          * prevent recursion, since the user stack tracing may
1832          * trigger other kernel events.
1833          */
1834         preempt_disable();
1835         if (__this_cpu_read(user_stack_count))
1836                 goto out;
1837
1838         __this_cpu_inc(user_stack_count);
1839
1840         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1841                                           sizeof(*entry), flags, pc);
1842         if (!event)
1843                 goto out_drop_count;
1844         entry   = ring_buffer_event_data(event);
1845
1846         entry->tgid             = current->tgid;
1847         memset(&entry->caller, 0, sizeof(entry->caller));
1848
1849         trace.nr_entries        = 0;
1850         trace.max_entries       = FTRACE_STACK_ENTRIES;
1851         trace.skip              = 0;
1852         trace.entries           = entry->caller;
1853
1854         save_stack_trace_user(&trace);
1855         if (!call_filter_check_discard(call, entry, buffer, event))
1856                 __buffer_unlock_commit(buffer, event);
1857
1858  out_drop_count:
1859         __this_cpu_dec(user_stack_count);
1860  out:
1861         preempt_enable();
1862 }
1863
1864 #ifdef UNUSED
1865 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1866 {
1867         ftrace_trace_userstack(tr, flags, preempt_count());
1868 }
1869 #endif /* UNUSED */
1870
1871 #endif /* CONFIG_STACKTRACE */
1872
1873 /* created for use with alloc_percpu */
1874 struct trace_buffer_struct {
1875         char buffer[TRACE_BUF_SIZE];
1876 };
1877
1878 static struct trace_buffer_struct *trace_percpu_buffer;
1879 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1880 static struct trace_buffer_struct *trace_percpu_irq_buffer;
1881 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1882
1883 /*
1884  * The buffer used is dependent on the context. There is a per cpu
1885  * buffer for normal context, softirq contex, hard irq context and
1886  * for NMI context. Thise allows for lockless recording.
1887  *
1888  * Note, if the buffers failed to be allocated, then this returns NULL
1889  */
1890 static char *get_trace_buf(void)
1891 {
1892         struct trace_buffer_struct *percpu_buffer;
1893
1894         /*
1895          * If we have allocated per cpu buffers, then we do not
1896          * need to do any locking.
1897          */
1898         if (in_nmi())
1899                 percpu_buffer = trace_percpu_nmi_buffer;
1900         else if (in_irq())
1901                 percpu_buffer = trace_percpu_irq_buffer;
1902         else if (in_softirq())
1903                 percpu_buffer = trace_percpu_sirq_buffer;
1904         else
1905                 percpu_buffer = trace_percpu_buffer;
1906
1907         if (!percpu_buffer)
1908                 return NULL;
1909
1910         return this_cpu_ptr(&percpu_buffer->buffer[0]);
1911 }
1912
1913 static int alloc_percpu_trace_buffer(void)
1914 {
1915         struct trace_buffer_struct *buffers;
1916         struct trace_buffer_struct *sirq_buffers;
1917         struct trace_buffer_struct *irq_buffers;
1918         struct trace_buffer_struct *nmi_buffers;
1919
1920         buffers = alloc_percpu(struct trace_buffer_struct);
1921         if (!buffers)
1922                 goto err_warn;
1923
1924         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
1925         if (!sirq_buffers)
1926                 goto err_sirq;
1927
1928         irq_buffers = alloc_percpu(struct trace_buffer_struct);
1929         if (!irq_buffers)
1930                 goto err_irq;
1931
1932         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
1933         if (!nmi_buffers)
1934                 goto err_nmi;
1935
1936         trace_percpu_buffer = buffers;
1937         trace_percpu_sirq_buffer = sirq_buffers;
1938         trace_percpu_irq_buffer = irq_buffers;
1939         trace_percpu_nmi_buffer = nmi_buffers;
1940
1941         return 0;
1942
1943  err_nmi:
1944         free_percpu(irq_buffers);
1945  err_irq:
1946         free_percpu(sirq_buffers);
1947  err_sirq:
1948         free_percpu(buffers);
1949  err_warn:
1950         WARN(1, "Could not allocate percpu trace_printk buffer");
1951         return -ENOMEM;
1952 }
1953
1954 static int buffers_allocated;
1955
1956 void trace_printk_init_buffers(void)
1957 {
1958         if (buffers_allocated)
1959                 return;
1960
1961         if (alloc_percpu_trace_buffer())
1962                 return;
1963
1964         pr_info("ftrace: Allocated trace_printk buffers\n");
1965
1966         /* Expand the buffers to set size */
1967         tracing_update_buffers();
1968
1969         buffers_allocated = 1;
1970
1971         /*
1972          * trace_printk_init_buffers() can be called by modules.
1973          * If that happens, then we need to start cmdline recording
1974          * directly here. If the global_trace.buffer is already
1975          * allocated here, then this was called by module code.
1976          */
1977         if (global_trace.trace_buffer.buffer)
1978                 tracing_start_cmdline_record();
1979 }
1980
1981 void trace_printk_start_comm(void)
1982 {
1983         /* Start tracing comms if trace printk is set */
1984         if (!buffers_allocated)
1985                 return;
1986         tracing_start_cmdline_record();
1987 }
1988
1989 static void trace_printk_start_stop_comm(int enabled)
1990 {
1991         if (!buffers_allocated)
1992                 return;
1993
1994         if (enabled)
1995                 tracing_start_cmdline_record();
1996         else
1997                 tracing_stop_cmdline_record();
1998 }
1999
2000 /**
2001  * trace_vbprintk - write binary msg to tracing buffer
2002  *
2003  */
2004 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2005 {
2006         struct ftrace_event_call *call = &event_bprint;
2007         struct ring_buffer_event *event;
2008         struct ring_buffer *buffer;
2009         struct trace_array *tr = &global_trace;
2010         struct bprint_entry *entry;
2011         unsigned long flags;
2012         char *tbuffer;
2013         int len = 0, size, pc;
2014
2015         if (unlikely(tracing_selftest_running || tracing_disabled))
2016                 return 0;
2017
2018         /* Don't pollute graph traces with trace_vprintk internals */
2019         pause_graph_tracing();
2020
2021         pc = preempt_count();
2022         preempt_disable_notrace();
2023
2024         tbuffer = get_trace_buf();
2025         if (!tbuffer) {
2026                 len = 0;
2027                 goto out;
2028         }
2029
2030         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2031
2032         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2033                 goto out;
2034
2035         local_save_flags(flags);
2036         size = sizeof(*entry) + sizeof(u32) * len;
2037         buffer = tr->trace_buffer.buffer;
2038         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2039                                           flags, pc);
2040         if (!event)
2041                 goto out;
2042         entry = ring_buffer_event_data(event);
2043         entry->ip                       = ip;
2044         entry->fmt                      = fmt;
2045
2046         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2047         if (!call_filter_check_discard(call, entry, buffer, event)) {
2048                 __buffer_unlock_commit(buffer, event);
2049                 ftrace_trace_stack(buffer, flags, 6, pc);
2050         }
2051
2052 out:
2053         preempt_enable_notrace();
2054         unpause_graph_tracing();
2055
2056         return len;
2057 }
2058 EXPORT_SYMBOL_GPL(trace_vbprintk);
2059
2060 static int
2061 __trace_array_vprintk(struct ring_buffer *buffer,
2062                       unsigned long ip, const char *fmt, va_list args)
2063 {
2064         struct ftrace_event_call *call = &event_print;
2065         struct ring_buffer_event *event;
2066         int len = 0, size, pc;
2067         struct print_entry *entry;
2068         unsigned long flags;
2069         char *tbuffer;
2070
2071         if (tracing_disabled || tracing_selftest_running)
2072                 return 0;
2073
2074         /* Don't pollute graph traces with trace_vprintk internals */
2075         pause_graph_tracing();
2076
2077         pc = preempt_count();
2078         preempt_disable_notrace();
2079
2080
2081         tbuffer = get_trace_buf();
2082         if (!tbuffer) {
2083                 len = 0;
2084                 goto out;
2085         }
2086
2087         len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2088         if (len > TRACE_BUF_SIZE)
2089                 goto out;
2090
2091         local_save_flags(flags);
2092         size = sizeof(*entry) + len + 1;
2093         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2094                                           flags, pc);
2095         if (!event)
2096                 goto out;
2097         entry = ring_buffer_event_data(event);
2098         entry->ip = ip;
2099
2100         memcpy(&entry->buf, tbuffer, len);
2101         entry->buf[len] = '\0';
2102         if (!call_filter_check_discard(call, entry, buffer, event)) {
2103                 __buffer_unlock_commit(buffer, event);
2104                 ftrace_trace_stack(buffer, flags, 6, pc);
2105         }
2106  out:
2107         preempt_enable_notrace();
2108         unpause_graph_tracing();
2109
2110         return len;
2111 }
2112
2113 int trace_array_vprintk(struct trace_array *tr,
2114                         unsigned long ip, const char *fmt, va_list args)
2115 {
2116         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2117 }
2118
2119 int trace_array_printk(struct trace_array *tr,
2120                        unsigned long ip, const char *fmt, ...)
2121 {
2122         int ret;
2123         va_list ap;
2124
2125         if (!(trace_flags & TRACE_ITER_PRINTK))
2126                 return 0;
2127
2128         va_start(ap, fmt);
2129         ret = trace_array_vprintk(tr, ip, fmt, ap);
2130         va_end(ap);
2131         return ret;
2132 }
2133
2134 int trace_array_printk_buf(struct ring_buffer *buffer,
2135                            unsigned long ip, const char *fmt, ...)
2136 {
2137         int ret;
2138         va_list ap;
2139
2140         if (!(trace_flags & TRACE_ITER_PRINTK))
2141                 return 0;
2142
2143         va_start(ap, fmt);
2144         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2145         va_end(ap);
2146         return ret;
2147 }
2148
2149 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2150 {
2151         return trace_array_vprintk(&global_trace, ip, fmt, args);
2152 }
2153 EXPORT_SYMBOL_GPL(trace_vprintk);
2154
2155 static void trace_iterator_increment(struct trace_iterator *iter)
2156 {
2157         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2158
2159         iter->idx++;
2160         if (buf_iter)
2161                 ring_buffer_read(buf_iter, NULL);
2162 }
2163
2164 static struct trace_entry *
2165 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2166                 unsigned long *lost_events)
2167 {
2168         struct ring_buffer_event *event;
2169         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2170
2171         if (buf_iter)
2172                 event = ring_buffer_iter_peek(buf_iter, ts);
2173         else
2174                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2175                                          lost_events);
2176
2177         if (event) {
2178                 iter->ent_size = ring_buffer_event_length(event);
2179                 return ring_buffer_event_data(event);
2180         }
2181         iter->ent_size = 0;
2182         return NULL;
2183 }
2184
2185 static struct trace_entry *
2186 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2187                   unsigned long *missing_events, u64 *ent_ts)
2188 {
2189         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2190         struct trace_entry *ent, *next = NULL;
2191         unsigned long lost_events = 0, next_lost = 0;
2192         int cpu_file = iter->cpu_file;
2193         u64 next_ts = 0, ts;
2194         int next_cpu = -1;
2195         int next_size = 0;
2196         int cpu;
2197
2198         /*
2199          * If we are in a per_cpu trace file, don't bother by iterating over
2200          * all cpu and peek directly.
2201          */
2202         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2203                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2204                         return NULL;
2205                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2206                 if (ent_cpu)
2207                         *ent_cpu = cpu_file;
2208
2209                 return ent;
2210         }
2211
2212         for_each_tracing_cpu(cpu) {
2213
2214                 if (ring_buffer_empty_cpu(buffer, cpu))
2215                         continue;
2216
2217                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2218
2219                 /*
2220                  * Pick the entry with the smallest timestamp:
2221                  */
2222                 if (ent && (!next || ts < next_ts)) {
2223                         next = ent;
2224                         next_cpu = cpu;
2225                         next_ts = ts;
2226                         next_lost = lost_events;
2227                         next_size = iter->ent_size;
2228                 }
2229         }
2230
2231         iter->ent_size = next_size;
2232
2233         if (ent_cpu)
2234                 *ent_cpu = next_cpu;
2235
2236         if (ent_ts)
2237                 *ent_ts = next_ts;
2238
2239         if (missing_events)
2240                 *missing_events = next_lost;
2241
2242         return next;
2243 }
2244
2245 /* Find the next real entry, without updating the iterator itself */
2246 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2247                                           int *ent_cpu, u64 *ent_ts)
2248 {
2249         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2250 }
2251
2252 /* Find the next real entry, and increment the iterator to the next entry */
2253 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2254 {
2255         iter->ent = __find_next_entry(iter, &iter->cpu,
2256                                       &iter->lost_events, &iter->ts);
2257
2258         if (iter->ent)
2259                 trace_iterator_increment(iter);
2260
2261         return iter->ent ? iter : NULL;
2262 }
2263
2264 static void trace_consume(struct trace_iterator *iter)
2265 {
2266         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2267                             &iter->lost_events);
2268 }
2269
2270 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2271 {
2272         struct trace_iterator *iter = m->private;
2273         int i = (int)*pos;
2274         void *ent;
2275
2276         WARN_ON_ONCE(iter->leftover);
2277
2278         (*pos)++;
2279
2280         /* can't go backwards */
2281         if (iter->idx > i)
2282                 return NULL;
2283
2284         if (iter->idx < 0)
2285                 ent = trace_find_next_entry_inc(iter);
2286         else
2287                 ent = iter;
2288
2289         while (ent && iter->idx < i)
2290                 ent = trace_find_next_entry_inc(iter);
2291
2292         iter->pos = *pos;
2293
2294         return ent;
2295 }
2296
2297 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2298 {
2299         struct ring_buffer_event *event;
2300         struct ring_buffer_iter *buf_iter;
2301         unsigned long entries = 0;
2302         u64 ts;
2303
2304         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2305
2306         buf_iter = trace_buffer_iter(iter, cpu);
2307         if (!buf_iter)
2308                 return;
2309
2310         ring_buffer_iter_reset(buf_iter);
2311
2312         /*
2313          * We could have the case with the max latency tracers
2314          * that a reset never took place on a cpu. This is evident
2315          * by the timestamp being before the start of the buffer.
2316          */
2317         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2318                 if (ts >= iter->trace_buffer->time_start)
2319                         break;
2320                 entries++;
2321                 ring_buffer_read(buf_iter, NULL);
2322         }
2323
2324         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2325 }
2326
2327 /*
2328  * The current tracer is copied to avoid a global locking
2329  * all around.
2330  */
2331 static void *s_start(struct seq_file *m, loff_t *pos)
2332 {
2333         struct trace_iterator *iter = m->private;
2334         struct trace_array *tr = iter->tr;
2335         int cpu_file = iter->cpu_file;
2336         void *p = NULL;
2337         loff_t l = 0;
2338         int cpu;
2339
2340         /*
2341          * copy the tracer to avoid using a global lock all around.
2342          * iter->trace is a copy of current_trace, the pointer to the
2343          * name may be used instead of a strcmp(), as iter->trace->name
2344          * will point to the same string as current_trace->name.
2345          */
2346         mutex_lock(&trace_types_lock);
2347         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2348                 *iter->trace = *tr->current_trace;
2349         mutex_unlock(&trace_types_lock);
2350
2351 #ifdef CONFIG_TRACER_MAX_TRACE
2352         if (iter->snapshot && iter->trace->use_max_tr)
2353                 return ERR_PTR(-EBUSY);
2354 #endif
2355
2356         if (!iter->snapshot)
2357                 atomic_inc(&trace_record_cmdline_disabled);
2358
2359         if (*pos != iter->pos) {
2360                 iter->ent = NULL;
2361                 iter->cpu = 0;
2362                 iter->idx = -1;
2363
2364                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2365                         for_each_tracing_cpu(cpu)
2366                                 tracing_iter_reset(iter, cpu);
2367                 } else
2368                         tracing_iter_reset(iter, cpu_file);
2369
2370                 iter->leftover = 0;
2371                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2372                         ;
2373
2374         } else {
2375                 /*
2376                  * If we overflowed the seq_file before, then we want
2377                  * to just reuse the trace_seq buffer again.
2378                  */
2379                 if (iter->leftover)
2380                         p = iter;
2381                 else {
2382                         l = *pos - 1;
2383                         p = s_next(m, p, &l);
2384                 }
2385         }
2386
2387         trace_event_read_lock();
2388         trace_access_lock(cpu_file);
2389         return p;
2390 }
2391
2392 static void s_stop(struct seq_file *m, void *p)
2393 {
2394         struct trace_iterator *iter = m->private;
2395
2396 #ifdef CONFIG_TRACER_MAX_TRACE
2397         if (iter->snapshot && iter->trace->use_max_tr)
2398                 return;
2399 #endif
2400
2401         if (!iter->snapshot)
2402                 atomic_dec(&trace_record_cmdline_disabled);
2403
2404         trace_access_unlock(iter->cpu_file);
2405         trace_event_read_unlock();
2406 }
2407
2408 static void
2409 get_total_entries(struct trace_buffer *buf,
2410                   unsigned long *total, unsigned long *entries)
2411 {
2412         unsigned long count;
2413         int cpu;
2414
2415         *total = 0;
2416         *entries = 0;
2417
2418         for_each_tracing_cpu(cpu) {
2419                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2420                 /*
2421                  * If this buffer has skipped entries, then we hold all
2422                  * entries for the trace and we need to ignore the
2423                  * ones before the time stamp.
2424                  */
2425                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2426                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2427                         /* total is the same as the entries */
2428                         *total += count;
2429                 } else
2430                         *total += count +
2431                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2432                 *entries += count;
2433         }
2434 }
2435
2436 static void print_lat_help_header(struct seq_file *m)
2437 {
2438         seq_puts(m, "#                  _------=> CPU#            \n");
2439         seq_puts(m, "#                 / _-----=> irqs-off        \n");
2440         seq_puts(m, "#                | / _----=> need-resched    \n");
2441         seq_puts(m, "#                || / _---=> hardirq/softirq \n");
2442         seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
2443         seq_puts(m, "#                |||| /     delay             \n");
2444         seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
2445         seq_puts(m, "#     \\   /      |||||  \\    |   /           \n");
2446 }
2447
2448 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2449 {
2450         unsigned long total;
2451         unsigned long entries;
2452
2453         get_total_entries(buf, &total, &entries);
2454         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2455                    entries, total, num_online_cpus());
2456         seq_puts(m, "#\n");
2457 }
2458
2459 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2460 {
2461         print_event_info(buf, m);
2462         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n");
2463         seq_puts(m, "#              | |       |          |         |\n");
2464 }
2465
2466 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2467 {
2468         print_event_info(buf, m);
2469         seq_puts(m, "#                              _-----=> irqs-off\n");
2470         seq_puts(m, "#                             / _----=> need-resched\n");
2471         seq_puts(m, "#                            | / _---=> hardirq/softirq\n");
2472         seq_puts(m, "#                            || / _--=> preempt-depth\n");
2473         seq_puts(m, "#                            ||| /     delay\n");
2474         seq_puts(m, "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n");
2475         seq_puts(m, "#              | |       |   ||||       |         |\n");
2476 }
2477
2478 void
2479 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2480 {
2481         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2482         struct trace_buffer *buf = iter->trace_buffer;
2483         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2484         struct tracer *type = iter->trace;
2485         unsigned long entries;
2486         unsigned long total;
2487         const char *name = "preemption";
2488
2489         name = type->name;
2490
2491         get_total_entries(buf, &total, &entries);
2492
2493         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2494                    name, UTS_RELEASE);
2495         seq_puts(m, "# -----------------------------------"
2496                  "---------------------------------\n");
2497         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2498                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2499                    nsecs_to_usecs(data->saved_latency),
2500                    entries,
2501                    total,
2502                    buf->cpu,
2503 #if defined(CONFIG_PREEMPT_NONE)
2504                    "server",
2505 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2506                    "desktop",
2507 #elif defined(CONFIG_PREEMPT)
2508                    "preempt",
2509 #else
2510                    "unknown",
2511 #endif
2512                    /* These are reserved for later use */
2513                    0, 0, 0, 0);
2514 #ifdef CONFIG_SMP
2515         seq_printf(m, " #P:%d)\n", num_online_cpus());
2516 #else
2517         seq_puts(m, ")\n");
2518 #endif
2519         seq_puts(m, "#    -----------------\n");
2520         seq_printf(m, "#    | task: %.16s-%d "
2521                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2522                    data->comm, data->pid,
2523                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2524                    data->policy, data->rt_priority);
2525         seq_puts(m, "#    -----------------\n");
2526
2527         if (data->critical_start) {
2528                 seq_puts(m, "#  => started at: ");
2529                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2530                 trace_print_seq(m, &iter->seq);
2531                 seq_puts(m, "\n#  => ended at:   ");
2532                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2533                 trace_print_seq(m, &iter->seq);
2534                 seq_puts(m, "\n#\n");
2535         }
2536
2537         seq_puts(m, "#\n");
2538 }
2539
2540 static void test_cpu_buff_start(struct trace_iterator *iter)
2541 {
2542         struct trace_seq *s = &iter->seq;
2543
2544         if (!(trace_flags & TRACE_ITER_ANNOTATE))
2545                 return;
2546
2547         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2548                 return;
2549
2550         if (cpumask_test_cpu(iter->cpu, iter->started))
2551                 return;
2552
2553         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2554                 return;
2555
2556         cpumask_set_cpu(iter->cpu, iter->started);
2557
2558         /* Don't print started cpu buffer for the first entry of the trace */
2559         if (iter->idx > 1)
2560                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2561                                 iter->cpu);
2562 }
2563
2564 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2565 {
2566         struct trace_seq *s = &iter->seq;
2567         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2568         struct trace_entry *entry;
2569         struct trace_event *event;
2570
2571         entry = iter->ent;
2572
2573         test_cpu_buff_start(iter);
2574
2575         event = ftrace_find_event(entry->type);
2576
2577         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2578                 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2579                         if (!trace_print_lat_context(iter))
2580                                 goto partial;
2581                 } else {
2582                         if (!trace_print_context(iter))
2583                                 goto partial;
2584                 }
2585         }
2586
2587         if (event)
2588                 return event->funcs->trace(iter, sym_flags, event);
2589
2590         if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
2591                 goto partial;
2592
2593         return TRACE_TYPE_HANDLED;
2594 partial:
2595         return TRACE_TYPE_PARTIAL_LINE;
2596 }
2597
2598 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2599 {
2600         struct trace_seq *s = &iter->seq;
2601         struct trace_entry *entry;
2602         struct trace_event *event;
2603
2604         entry = iter->ent;
2605
2606         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2607                 if (!trace_seq_printf(s, "%d %d %llu ",
2608                                       entry->pid, iter->cpu, iter->ts))
2609                         goto partial;
2610         }
2611
2612         event = ftrace_find_event(entry->type);
2613         if (event)
2614                 return event->funcs->raw(iter, 0, event);
2615
2616         if (!trace_seq_printf(s, "%d ?\n", entry->type))
2617                 goto partial;
2618
2619         return TRACE_TYPE_HANDLED;
2620 partial:
2621         return TRACE_TYPE_PARTIAL_LINE;
2622 }
2623
2624 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2625 {
2626         struct trace_seq *s = &iter->seq;
2627         unsigned char newline = '\n';
2628         struct trace_entry *entry;
2629         struct trace_event *event;
2630
2631         entry = iter->ent;
2632
2633         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2634                 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
2635                 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
2636                 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
2637         }
2638
2639         event = ftrace_find_event(entry->type);
2640         if (event) {
2641                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2642                 if (ret != TRACE_TYPE_HANDLED)
2643                         return ret;
2644         }
2645
2646         SEQ_PUT_FIELD_RET(s, newline);
2647
2648         return TRACE_TYPE_HANDLED;
2649 }
2650
2651 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2652 {
2653         struct trace_seq *s = &iter->seq;
2654         struct trace_entry *entry;
2655         struct trace_event *event;
2656
2657         entry = iter->ent;
2658
2659         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2660                 SEQ_PUT_FIELD_RET(s, entry->pid);
2661                 SEQ_PUT_FIELD_RET(s, iter->cpu);
2662                 SEQ_PUT_FIELD_RET(s, iter->ts);
2663         }
2664
2665         event = ftrace_find_event(entry->type);
2666         return event ? event->funcs->binary(iter, 0, event) :
2667                 TRACE_TYPE_HANDLED;
2668 }
2669
2670 int trace_empty(struct trace_iterator *iter)
2671 {
2672         struct ring_buffer_iter *buf_iter;
2673         int cpu;
2674
2675         /* If we are looking at one CPU buffer, only check that one */
2676         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2677                 cpu = iter->cpu_file;
2678                 buf_iter = trace_buffer_iter(iter, cpu);
2679                 if (buf_iter) {
2680                         if (!ring_buffer_iter_empty(buf_iter))
2681                                 return 0;
2682                 } else {
2683                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2684                                 return 0;
2685                 }
2686                 return 1;
2687         }
2688
2689         for_each_tracing_cpu(cpu) {
2690                 buf_iter = trace_buffer_iter(iter, cpu);
2691                 if (buf_iter) {
2692                         if (!ring_buffer_iter_empty(buf_iter))
2693                                 return 0;
2694                 } else {
2695                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2696                                 return 0;
2697                 }
2698         }
2699
2700         return 1;
2701 }
2702
2703 /*  Called with trace_event_read_lock() held. */
2704 enum print_line_t print_trace_line(struct trace_iterator *iter)
2705 {
2706         enum print_line_t ret;
2707
2708         if (iter->lost_events &&
2709             !trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2710                                  iter->cpu, iter->lost_events))
2711                 return TRACE_TYPE_PARTIAL_LINE;
2712
2713         if (iter->trace && iter->trace->print_line) {
2714                 ret = iter->trace->print_line(iter);
2715                 if (ret != TRACE_TYPE_UNHANDLED)
2716                         return ret;
2717         }
2718
2719         if (iter->ent->type == TRACE_BPUTS &&
2720                         trace_flags & TRACE_ITER_PRINTK &&
2721                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2722                 return trace_print_bputs_msg_only(iter);
2723
2724         if (iter->ent->type == TRACE_BPRINT &&
2725                         trace_flags & TRACE_ITER_PRINTK &&
2726                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2727                 return trace_print_bprintk_msg_only(iter);
2728
2729         if (iter->ent->type == TRACE_PRINT &&
2730                         trace_flags & TRACE_ITER_PRINTK &&
2731                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2732                 return trace_print_printk_msg_only(iter);
2733
2734         if (trace_flags & TRACE_ITER_BIN)
2735                 return print_bin_fmt(iter);
2736
2737         if (trace_flags & TRACE_ITER_HEX)
2738                 return print_hex_fmt(iter);
2739
2740         if (trace_flags & TRACE_ITER_RAW)
2741                 return print_raw_fmt(iter);
2742
2743         return print_trace_fmt(iter);
2744 }
2745
2746 void trace_latency_header(struct seq_file *m)
2747 {
2748         struct trace_iterator *iter = m->private;
2749
2750         /* print nothing if the buffers are empty */
2751         if (trace_empty(iter))
2752                 return;
2753
2754         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2755                 print_trace_header(m, iter);
2756
2757         if (!(trace_flags & TRACE_ITER_VERBOSE))
2758                 print_lat_help_header(m);
2759 }
2760
2761 void trace_default_header(struct seq_file *m)
2762 {
2763         struct trace_iterator *iter = m->private;
2764
2765         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2766                 return;
2767
2768         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2769                 /* print nothing if the buffers are empty */
2770                 if (trace_empty(iter))
2771                         return;
2772                 print_trace_header(m, iter);
2773                 if (!(trace_flags & TRACE_ITER_VERBOSE))
2774                         print_lat_help_header(m);
2775         } else {
2776                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2777                         if (trace_flags & TRACE_ITER_IRQ_INFO)
2778                                 print_func_help_header_irq(iter->trace_buffer, m);
2779                         else
2780                                 print_func_help_header(iter->trace_buffer, m);
2781                 }
2782         }
2783 }
2784
2785 static void test_ftrace_alive(struct seq_file *m)
2786 {
2787         if (!ftrace_is_dead())
2788                 return;
2789         seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n");
2790         seq_printf(m, "#          MAY BE MISSING FUNCTION EVENTS\n");
2791 }
2792
2793 #ifdef CONFIG_TRACER_MAX_TRACE
2794 static void show_snapshot_main_help(struct seq_file *m)
2795 {
2796         seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n");
2797         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2798         seq_printf(m, "#                      Takes a snapshot of the main buffer.\n");
2799         seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n");
2800         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2801         seq_printf(m, "#                       is not a '0' or '1')\n");
2802 }
2803
2804 static void show_snapshot_percpu_help(struct seq_file *m)
2805 {
2806         seq_printf(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2807 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2808         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2809         seq_printf(m, "#                      Takes a snapshot of the main buffer for this cpu.\n");
2810 #else
2811         seq_printf(m, "# echo 1 > snapshot : Not supported with this kernel.\n");
2812         seq_printf(m, "#                     Must use main snapshot file to allocate.\n");
2813 #endif
2814         seq_printf(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n");
2815         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2816         seq_printf(m, "#                       is not a '0' or '1')\n");
2817 }
2818
2819 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2820 {
2821         if (iter->tr->allocated_snapshot)
2822                 seq_printf(m, "#\n# * Snapshot is allocated *\n#\n");
2823         else
2824                 seq_printf(m, "#\n# * Snapshot is freed *\n#\n");
2825
2826         seq_printf(m, "# Snapshot commands:\n");
2827         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2828                 show_snapshot_main_help(m);
2829         else
2830                 show_snapshot_percpu_help(m);
2831 }
2832 #else
2833 /* Should never be called */
2834 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2835 #endif
2836
2837 static int s_show(struct seq_file *m, void *v)
2838 {
2839         struct trace_iterator *iter = v;
2840         int ret;
2841
2842         if (iter->ent == NULL) {
2843                 if (iter->tr) {
2844                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
2845                         seq_puts(m, "#\n");
2846                         test_ftrace_alive(m);
2847                 }
2848                 if (iter->snapshot && trace_empty(iter))
2849                         print_snapshot_help(m, iter);
2850                 else if (iter->trace && iter->trace->print_header)
2851                         iter->trace->print_header(m);
2852                 else
2853                         trace_default_header(m);
2854
2855         } else if (iter->leftover) {
2856                 /*
2857                  * If we filled the seq_file buffer earlier, we
2858                  * want to just show it now.
2859                  */
2860                 ret = trace_print_seq(m, &iter->seq);
2861
2862                 /* ret should this time be zero, but you never know */
2863                 iter->leftover = ret;
2864
2865         } else {
2866                 print_trace_line(iter);
2867                 ret = trace_print_seq(m, &iter->seq);
2868                 /*
2869                  * If we overflow the seq_file buffer, then it will
2870                  * ask us for this data again at start up.
2871                  * Use that instead.
2872                  *  ret is 0 if seq_file write succeeded.
2873                  *        -1 otherwise.
2874                  */
2875                 iter->leftover = ret;
2876         }
2877
2878         return 0;
2879 }
2880
2881 /*
2882  * Should be used after trace_array_get(), trace_types_lock
2883  * ensures that i_cdev was already initialized.
2884  */
2885 static inline int tracing_get_cpu(struct inode *inode)
2886 {
2887         if (inode->i_cdev) /* See trace_create_cpu_file() */
2888                 return (long)inode->i_cdev - 1;
2889         return RING_BUFFER_ALL_CPUS;
2890 }
2891
2892 static const struct seq_operations tracer_seq_ops = {
2893         .start          = s_start,
2894         .next           = s_next,
2895         .stop           = s_stop,
2896         .show           = s_show,
2897 };
2898
2899 static struct trace_iterator *
2900 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
2901 {
2902         struct trace_array *tr = inode->i_private;
2903         struct trace_iterator *iter;
2904         int cpu;
2905
2906         if (tracing_disabled)
2907                 return ERR_PTR(-ENODEV);
2908
2909         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
2910         if (!iter)
2911                 return ERR_PTR(-ENOMEM);
2912
2913         iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
2914                                     GFP_KERNEL);
2915         if (!iter->buffer_iter)
2916                 goto release;
2917
2918         /*
2919          * We make a copy of the current tracer to avoid concurrent
2920          * changes on it while we are reading.
2921          */
2922         mutex_lock(&trace_types_lock);
2923         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
2924         if (!iter->trace)
2925                 goto fail;
2926
2927         *iter->trace = *tr->current_trace;
2928
2929         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
2930                 goto fail;
2931
2932         iter->tr = tr;
2933
2934 #ifdef CONFIG_TRACER_MAX_TRACE
2935         /* Currently only the top directory has a snapshot */
2936         if (tr->current_trace->print_max || snapshot)
2937                 iter->trace_buffer = &tr->max_buffer;
2938         else
2939 #endif
2940                 iter->trace_buffer = &tr->trace_buffer;
2941         iter->snapshot = snapshot;
2942         iter->pos = -1;
2943         iter->cpu_file = tracing_get_cpu(inode);
2944         mutex_init(&iter->mutex);
2945
2946         /* Notify the tracer early; before we stop tracing. */
2947         if (iter->trace && iter->trace->open)
2948                 iter->trace->open(iter);
2949
2950         /* Annotate start of buffers if we had overruns */
2951         if (ring_buffer_overruns(iter->trace_buffer->buffer))
2952                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
2953
2954         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
2955         if (trace_clocks[tr->clock_id].in_ns)
2956                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
2957
2958         /* stop the trace while dumping if we are not opening "snapshot" */
2959         if (!iter->snapshot)
2960                 tracing_stop_tr(tr);
2961
2962         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
2963                 for_each_tracing_cpu(cpu) {
2964                         iter->buffer_iter[cpu] =
2965                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
2966                 }
2967                 ring_buffer_read_prepare_sync();
2968                 for_each_tracing_cpu(cpu) {
2969                         ring_buffer_read_start(iter->buffer_iter[cpu]);
2970                         tracing_iter_reset(iter, cpu);
2971                 }
2972         } else {
2973                 cpu = iter->cpu_file;
2974                 iter->buffer_iter[cpu] =
2975                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
2976                 ring_buffer_read_prepare_sync();
2977                 ring_buffer_read_start(iter->buffer_iter[cpu]);
2978                 tracing_iter_reset(iter, cpu);
2979         }
2980
2981         mutex_unlock(&trace_types_lock);
2982
2983         return iter;
2984
2985  fail:
2986         mutex_unlock(&trace_types_lock);
2987         kfree(iter->trace);
2988         kfree(iter->buffer_iter);
2989 release:
2990         seq_release_private(inode, file);
2991         return ERR_PTR(-ENOMEM);
2992 }
2993
2994 int tracing_open_generic(struct inode *inode, struct file *filp)
2995 {
2996         if (tracing_disabled)
2997                 return -ENODEV;
2998
2999         filp->private_data = inode->i_private;
3000         return 0;
3001 }
3002
3003 bool tracing_is_disabled(void)
3004 {
3005         return (tracing_disabled) ? true: false;
3006 }
3007
3008 /*
3009  * Open and update trace_array ref count.
3010  * Must have the current trace_array passed to it.
3011  */
3012 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3013 {
3014         struct trace_array *tr = inode->i_private;
3015
3016         if (tracing_disabled)
3017                 return -ENODEV;
3018
3019         if (trace_array_get(tr) < 0)
3020                 return -ENODEV;
3021
3022         filp->private_data = inode->i_private;
3023
3024         return 0;
3025 }
3026
3027 static int tracing_release(struct inode *inode, struct file *file)
3028 {
3029         struct trace_array *tr = inode->i_private;
3030         struct seq_file *m = file->private_data;
3031         struct trace_iterator *iter;
3032         int cpu;
3033
3034         if (!(file->f_mode & FMODE_READ)) {
3035                 trace_array_put(tr);
3036                 return 0;
3037         }
3038
3039         /* Writes do not use seq_file */
3040         iter = m->private;
3041         mutex_lock(&trace_types_lock);
3042
3043         for_each_tracing_cpu(cpu) {
3044                 if (iter->buffer_iter[cpu])
3045                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3046         }
3047
3048         if (iter->trace && iter->trace->close)
3049                 iter->trace->close(iter);
3050
3051         if (!iter->snapshot)
3052                 /* reenable tracing if it was previously enabled */
3053                 tracing_start_tr(tr);
3054
3055         __trace_array_put(tr);
3056
3057         mutex_unlock(&trace_types_lock);
3058
3059         mutex_destroy(&iter->mutex);
3060         free_cpumask_var(iter->started);
3061         kfree(iter->trace);
3062         kfree(iter->buffer_iter);
3063         seq_release_private(inode, file);
3064
3065         return 0;
3066 }
3067
3068 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3069 {
3070         struct trace_array *tr = inode->i_private;
3071
3072         trace_array_put(tr);
3073         return 0;
3074 }
3075
3076 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3077 {
3078         struct trace_array *tr = inode->i_private;
3079
3080         trace_array_put(tr);
3081
3082         return single_release(inode, file);
3083 }
3084
3085 static int tracing_open(struct inode *inode, struct file *file)
3086 {
3087         struct trace_array *tr = inode->i_private;
3088         struct trace_iterator *iter;
3089         int ret = 0;
3090
3091         if (trace_array_get(tr) < 0)
3092                 return -ENODEV;
3093
3094         /* If this file was open for write, then erase contents */
3095         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3096                 int cpu = tracing_get_cpu(inode);
3097
3098                 if (cpu == RING_BUFFER_ALL_CPUS)
3099                         tracing_reset_online_cpus(&tr->trace_buffer);
3100                 else
3101                         tracing_reset(&tr->trace_buffer, cpu);
3102         }
3103
3104         if (file->f_mode & FMODE_READ) {
3105                 iter = __tracing_open(inode, file, false);
3106                 if (IS_ERR(iter))
3107                         ret = PTR_ERR(iter);
3108                 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
3109                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3110         }
3111
3112         if (ret < 0)
3113                 trace_array_put(tr);
3114
3115         return ret;
3116 }
3117
3118 static void *
3119 t_next(struct seq_file *m, void *v, loff_t *pos)
3120 {
3121         struct tracer *t = v;
3122
3123         (*pos)++;
3124
3125         if (t)
3126                 t = t->next;
3127
3128         return t;
3129 }
3130
3131 static void *t_start(struct seq_file *m, loff_t *pos)
3132 {
3133         struct tracer *t;
3134         loff_t l = 0;
3135
3136         mutex_lock(&trace_types_lock);
3137         for (t = trace_types; t && l < *pos; t = t_next(m, t, &l))
3138                 ;
3139
3140         return t;
3141 }
3142
3143 static void t_stop(struct seq_file *m, void *p)
3144 {
3145         mutex_unlock(&trace_types_lock);
3146 }
3147
3148 static int t_show(struct seq_file *m, void *v)
3149 {
3150         struct tracer *t = v;
3151
3152         if (!t)
3153                 return 0;
3154
3155         seq_printf(m, "%s", t->name);
3156         if (t->next)
3157                 seq_putc(m, ' ');
3158         else
3159                 seq_putc(m, '\n');
3160
3161         return 0;
3162 }
3163
3164 static const struct seq_operations show_traces_seq_ops = {
3165         .start          = t_start,
3166         .next           = t_next,
3167         .stop           = t_stop,
3168         .show           = t_show,
3169 };
3170
3171 static int show_traces_open(struct inode *inode, struct file *file)
3172 {
3173         if (tracing_disabled)
3174                 return -ENODEV;
3175
3176         return seq_open(file, &show_traces_seq_ops);
3177 }
3178
3179 static ssize_t
3180 tracing_write_stub(struct file *filp, const char __user *ubuf,
3181                    size_t count, loff_t *ppos)
3182 {
3183         return count;
3184 }
3185
3186 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3187 {
3188         int ret;
3189
3190         if (file->f_mode & FMODE_READ)
3191                 ret = seq_lseek(file, offset, whence);
3192         else
3193                 file->f_pos = ret = 0;
3194
3195         return ret;
3196 }
3197
3198 static const struct file_operations tracing_fops = {
3199         .open           = tracing_open,
3200         .read           = seq_read,
3201         .write          = tracing_write_stub,
3202         .llseek         = tracing_lseek,
3203         .release        = tracing_release,
3204 };
3205
3206 static const struct file_operations show_traces_fops = {
3207         .open           = show_traces_open,
3208         .read           = seq_read,
3209         .release        = seq_release,
3210         .llseek         = seq_lseek,
3211 };
3212
3213 /*
3214  * The tracer itself will not take this lock, but still we want
3215  * to provide a consistent cpumask to user-space:
3216  */
3217 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3218
3219 /*
3220  * Temporary storage for the character representation of the
3221  * CPU bitmask (and one more byte for the newline):
3222  */
3223 static char mask_str[NR_CPUS + 1];
3224
3225 static ssize_t
3226 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3227                      size_t count, loff_t *ppos)
3228 {
3229         struct trace_array *tr = file_inode(filp)->i_private;
3230         int len;
3231
3232         mutex_lock(&tracing_cpumask_update_lock);
3233
3234         len = cpumask_scnprintf(mask_str, count, tr->tracing_cpumask);
3235         if (count - len < 2) {
3236                 count = -EINVAL;
3237                 goto out_err;
3238         }
3239         len += sprintf(mask_str + len, "\n");
3240         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3241
3242 out_err:
3243         mutex_unlock(&tracing_cpumask_update_lock);
3244
3245         return count;
3246 }
3247
3248 static ssize_t
3249 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3250                       size_t count, loff_t *ppos)
3251 {
3252         struct trace_array *tr = file_inode(filp)->i_private;
3253         cpumask_var_t tracing_cpumask_new;
3254         int err, cpu;
3255
3256         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3257                 return -ENOMEM;
3258
3259         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3260         if (err)
3261                 goto err_unlock;
3262
3263         mutex_lock(&tracing_cpumask_update_lock);
3264
3265         local_irq_disable();
3266         arch_spin_lock(&ftrace_max_lock);
3267         for_each_tracing_cpu(cpu) {
3268                 /*
3269                  * Increase/decrease the disabled counter if we are
3270                  * about to flip a bit in the cpumask:
3271                  */
3272                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3273                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3274                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3275                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3276                 }
3277                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3278                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3279                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3280                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3281                 }
3282         }
3283         arch_spin_unlock(&ftrace_max_lock);
3284         local_irq_enable();
3285
3286         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3287
3288         mutex_unlock(&tracing_cpumask_update_lock);
3289         free_cpumask_var(tracing_cpumask_new);
3290
3291         return count;
3292
3293 err_unlock:
3294         free_cpumask_var(tracing_cpumask_new);
3295
3296         return err;
3297 }
3298
3299 static const struct file_operations tracing_cpumask_fops = {
3300         .open           = tracing_open_generic_tr,
3301         .read           = tracing_cpumask_read,
3302         .write          = tracing_cpumask_write,
3303         .release        = tracing_release_generic_tr,
3304         .llseek         = generic_file_llseek,
3305 };
3306
3307 static int tracing_trace_options_show(struct seq_file *m, void *v)
3308 {
3309         struct tracer_opt *trace_opts;
3310         struct trace_array *tr = m->private;
3311         u32 tracer_flags;
3312         int i;
3313
3314         mutex_lock(&trace_types_lock);
3315         tracer_flags = tr->current_trace->flags->val;
3316         trace_opts = tr->current_trace->flags->opts;
3317
3318         for (i = 0; trace_options[i]; i++) {
3319                 if (trace_flags & (1 << i))
3320                         seq_printf(m, "%s\n", trace_options[i]);
3321                 else
3322                         seq_printf(m, "no%s\n", trace_options[i]);
3323         }
3324
3325         for (i = 0; trace_opts[i].name; i++) {
3326                 if (tracer_flags & trace_opts[i].bit)
3327                         seq_printf(m, "%s\n", trace_opts[i].name);
3328                 else
3329                         seq_printf(m, "no%s\n", trace_opts[i].name);
3330         }
3331         mutex_unlock(&trace_types_lock);
3332
3333         return 0;
3334 }
3335
3336 static int __set_tracer_option(struct tracer *trace,
3337                                struct tracer_flags *tracer_flags,
3338                                struct tracer_opt *opts, int neg)
3339 {
3340         int ret;
3341
3342         ret = trace->set_flag(tracer_flags->val, opts->bit, !neg);
3343         if (ret)
3344                 return ret;
3345
3346         if (neg)
3347                 tracer_flags->val &= ~opts->bit;
3348         else
3349                 tracer_flags->val |= opts->bit;
3350         return 0;
3351 }
3352
3353 /* Try to assign a tracer specific option */
3354 static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
3355 {
3356         struct tracer_flags *tracer_flags = trace->flags;
3357         struct tracer_opt *opts = NULL;
3358         int i;
3359
3360         for (i = 0; tracer_flags->opts[i].name; i++) {
3361                 opts = &tracer_flags->opts[i];
3362
3363                 if (strcmp(cmp, opts->name) == 0)
3364                         return __set_tracer_option(trace, trace->flags,
3365                                                    opts, neg);
3366         }
3367
3368         return -EINVAL;
3369 }
3370
3371 /* Some tracers require overwrite to stay enabled */
3372 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3373 {
3374         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3375                 return -1;
3376
3377         return 0;
3378 }
3379
3380 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3381 {
3382         /* do nothing if flag is already set */
3383         if (!!(trace_flags & mask) == !!enabled)
3384                 return 0;
3385
3386         /* Give the tracer a chance to approve the change */
3387         if (tr->current_trace->flag_changed)
3388                 if (tr->current_trace->flag_changed(tr->current_trace, mask, !!enabled))
3389                         return -EINVAL;
3390
3391         if (enabled)
3392                 trace_flags |= mask;
3393         else
3394                 trace_flags &= ~mask;
3395
3396         if (mask == TRACE_ITER_RECORD_CMD)
3397                 trace_event_enable_cmd_record(enabled);
3398
3399         if (mask == TRACE_ITER_OVERWRITE) {
3400                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3401 #ifdef CONFIG_TRACER_MAX_TRACE
3402                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3403 #endif
3404         }
3405
3406         if (mask == TRACE_ITER_PRINTK)
3407                 trace_printk_start_stop_comm(enabled);
3408
3409         return 0;
3410 }
3411
3412 static int trace_set_options(struct trace_array *tr, char *option)
3413 {
3414         char *cmp;
3415         int neg = 0;
3416         int ret = -ENODEV;
3417         int i;
3418
3419         cmp = strstrip(option);
3420
3421         if (strncmp(cmp, "no", 2) == 0) {
3422                 neg = 1;
3423                 cmp += 2;
3424         }
3425
3426         mutex_lock(&trace_types_lock);
3427
3428         for (i = 0; trace_options[i]; i++) {
3429                 if (strcmp(cmp, trace_options[i]) == 0) {
3430                         ret = set_tracer_flag(tr, 1 << i, !neg);
3431                         break;
3432                 }
3433         }
3434
3435         /* If no option could be set, test the specific tracer options */
3436         if (!trace_options[i])
3437                 ret = set_tracer_option(tr->current_trace, cmp, neg);
3438
3439         mutex_unlock(&trace_types_lock);
3440
3441         return ret;
3442 }
3443
3444 static ssize_t
3445 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3446                         size_t cnt, loff_t *ppos)
3447 {
3448         struct seq_file *m = filp->private_data;
3449         struct trace_array *tr = m->private;
3450         char buf[64];
3451         int ret;
3452
3453         if (cnt >= sizeof(buf))
3454                 return -EINVAL;
3455
3456         if (copy_from_user(&buf, ubuf, cnt))
3457                 return -EFAULT;
3458
3459         buf[cnt] = 0;
3460
3461         ret = trace_set_options(tr, buf);
3462         if (ret < 0)
3463                 return ret;
3464
3465         *ppos += cnt;
3466
3467         return cnt;
3468 }
3469
3470 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3471 {
3472         struct trace_array *tr = inode->i_private;
3473         int ret;
3474
3475         if (tracing_disabled)
3476                 return -ENODEV;
3477
3478         if (trace_array_get(tr) < 0)
3479                 return -ENODEV;
3480
3481         ret = single_open(file, tracing_trace_options_show, inode->i_private);
3482         if (ret < 0)
3483                 trace_array_put(tr);
3484
3485         return ret;
3486 }
3487
3488 static const struct file_operations tracing_iter_fops = {
3489         .open           = tracing_trace_options_open,
3490         .read           = seq_read,
3491         .llseek         = seq_lseek,
3492         .release        = tracing_single_release_tr,
3493         .write          = tracing_trace_options_write,
3494 };
3495
3496 static const char readme_msg[] =
3497         "tracing mini-HOWTO:\n\n"
3498         "# echo 0 > tracing_on : quick way to disable tracing\n"
3499         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3500         " Important files:\n"
3501         "  trace\t\t\t- The static contents of the buffer\n"
3502         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
3503         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3504         "  current_tracer\t- function and latency tracers\n"
3505         "  available_tracers\t- list of configured tracers for current_tracer\n"
3506         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3507         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3508         "  trace_clock\t\t-change the clock used to order events\n"
3509         "       local:   Per cpu clock but may not be synced across CPUs\n"
3510         "      global:   Synced across CPUs but slows tracing down.\n"
3511         "     counter:   Not a clock, but just an increment\n"
3512         "      uptime:   Jiffy counter from time of boot\n"
3513         "        perf:   Same clock that perf events use\n"
3514 #ifdef CONFIG_X86_64
3515         "     x86-tsc:   TSC cycle counter\n"
3516 #endif
3517         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3518         "  tracing_cpumask\t- Limit which CPUs to trace\n"
3519         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3520         "\t\t\t  Remove sub-buffer with rmdir\n"
3521         "  trace_options\t\t- Set format or modify how tracing happens\n"
3522         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
3523         "\t\t\t  option name\n"
3524 #ifdef CONFIG_DYNAMIC_FTRACE
3525         "\n  available_filter_functions - list of functions that can be filtered on\n"
3526         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
3527         "\t\t\t  functions\n"
3528         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3529         "\t     modules: Can select a group via module\n"
3530         "\t      Format: :mod:<module-name>\n"
3531         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
3532         "\t    triggers: a command to perform when function is hit\n"
3533         "\t      Format: <function>:<trigger>[:count]\n"
3534         "\t     trigger: traceon, traceoff\n"
3535         "\t\t      enable_event:<system>:<event>\n"
3536         "\t\t      disable_event:<system>:<event>\n"
3537 #ifdef CONFIG_STACKTRACE
3538         "\t\t      stacktrace\n"
3539 #endif
3540 #ifdef CONFIG_TRACER_SNAPSHOT
3541         "\t\t      snapshot\n"
3542 #endif
3543         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
3544         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
3545         "\t     The first one will disable tracing every time do_fault is hit\n"
3546         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
3547         "\t       The first time do trap is hit and it disables tracing, the\n"
3548         "\t       counter will decrement to 2. If tracing is already disabled,\n"
3549         "\t       the counter will not decrement. It only decrements when the\n"
3550         "\t       trigger did work\n"
3551         "\t     To remove trigger without count:\n"
3552         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
3553         "\t     To remove trigger with a count:\n"
3554         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3555         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3556         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3557         "\t    modules: Can select a group via module command :mod:\n"
3558         "\t    Does not accept triggers\n"
3559 #endif /* CONFIG_DYNAMIC_FTRACE */
3560 #ifdef CONFIG_FUNCTION_TRACER
3561         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3562         "\t\t    (function)\n"
3563 #endif
3564 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3565         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3566         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3567 #endif
3568 #ifdef CONFIG_TRACER_SNAPSHOT
3569         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
3570         "\t\t\t  snapshot buffer. Read the contents for more\n"
3571         "\t\t\t  information\n"
3572 #endif
3573 #ifdef CONFIG_STACK_TRACER
3574         "  stack_trace\t\t- Shows the max stack trace when active\n"
3575         "  stack_max_size\t- Shows current max stack size that was traced\n"
3576         "\t\t\t  Write into this file to reset the max size (trigger a\n"
3577         "\t\t\t  new trace)\n"
3578 #ifdef CONFIG_DYNAMIC_FTRACE
3579         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3580         "\t\t\t  traces\n"
3581 #endif
3582 #endif /* CONFIG_STACK_TRACER */
3583         "  events/\t\t- Directory containing all trace event subsystems:\n"
3584         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3585         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
3586         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3587         "\t\t\t  events\n"
3588         "      filter\t\t- If set, only events passing filter are traced\n"
3589         "  events/<system>/<event>/\t- Directory containing control files for\n"
3590         "\t\t\t  <event>:\n"
3591         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3592         "      filter\t\t- If set, only events passing filter are traced\n"
3593         "      trigger\t\t- If set, a command to perform when event is hit\n"
3594         "\t    Format: <trigger>[:count][if <filter>]\n"
3595         "\t   trigger: traceon, traceoff\n"
3596         "\t            enable_event:<system>:<event>\n"
3597         "\t            disable_event:<system>:<event>\n"
3598 #ifdef CONFIG_STACKTRACE
3599         "\t\t    stacktrace\n"
3600 #endif
3601 #ifdef CONFIG_TRACER_SNAPSHOT
3602         "\t\t    snapshot\n"
3603 #endif
3604         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
3605         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
3606         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3607         "\t                  events/block/block_unplug/trigger\n"
3608         "\t   The first disables tracing every time block_unplug is hit.\n"
3609         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
3610         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
3611         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3612         "\t   Like function triggers, the counter is only decremented if it\n"
3613         "\t    enabled or disabled tracing.\n"
3614         "\t   To remove a trigger without a count:\n"
3615         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
3616         "\t   To remove a trigger with a count:\n"
3617         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
3618         "\t   Filters can be ignored when removing a trigger.\n"
3619 ;
3620
3621 static ssize_t
3622 tracing_readme_read(struct file *filp, char __user *ubuf,
3623                        size_t cnt, loff_t *ppos)
3624 {
3625         return simple_read_from_buffer(ubuf, cnt, ppos,
3626                                         readme_msg, strlen(readme_msg));
3627 }
3628
3629 static const struct file_operations tracing_readme_fops = {
3630         .open           = tracing_open_generic,
3631         .read           = tracing_readme_read,
3632         .llseek         = generic_file_llseek,
3633 };
3634
3635 static ssize_t
3636 tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
3637                                 size_t cnt, loff_t *ppos)
3638 {
3639         char *buf_comm;
3640         char *file_buf;
3641         char *buf;
3642         int len = 0;
3643         int pid;
3644         int i;
3645
3646         file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL);
3647         if (!file_buf)
3648                 return -ENOMEM;
3649
3650         buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL);
3651         if (!buf_comm) {
3652                 kfree(file_buf);
3653                 return -ENOMEM;
3654         }
3655
3656         buf = file_buf;
3657
3658         for (i = 0; i < SAVED_CMDLINES; i++) {
3659                 int r;
3660
3661                 pid = map_cmdline_to_pid[i];
3662                 if (pid == -1 || pid == NO_CMDLINE_MAP)
3663                         continue;
3664
3665                 trace_find_cmdline(pid, buf_comm);
3666                 r = sprintf(buf, "%d %s\n", pid, buf_comm);
3667                 buf += r;
3668                 len += r;
3669         }
3670
3671         len = simple_read_from_buffer(ubuf, cnt, ppos,
3672                                       file_buf, len);
3673
3674         kfree(file_buf);
3675         kfree(buf_comm);
3676
3677         return len;
3678 }
3679
3680 static const struct file_operations tracing_saved_cmdlines_fops = {
3681     .open       = tracing_open_generic,
3682     .read       = tracing_saved_cmdlines_read,
3683     .llseek     = generic_file_llseek,
3684 };
3685
3686 static ssize_t
3687 tracing_set_trace_read(struct file *filp, char __user *ubuf,
3688                        size_t cnt, loff_t *ppos)
3689 {
3690         struct trace_array *tr = filp->private_data;
3691         char buf[MAX_TRACER_SIZE+2];
3692         int r;
3693
3694         mutex_lock(&trace_types_lock);
3695         r = sprintf(buf, "%s\n", tr->current_trace->name);
3696         mutex_unlock(&trace_types_lock);
3697
3698         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3699 }
3700
3701 int tracer_init(struct tracer *t, struct trace_array *tr)
3702 {
3703         tracing_reset_online_cpus(&tr->trace_buffer);
3704         return t->init(tr);
3705 }
3706
3707 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
3708 {
3709         int cpu;
3710
3711         for_each_tracing_cpu(cpu)
3712                 per_cpu_ptr(buf->data, cpu)->entries = val;
3713 }
3714
3715 #ifdef CONFIG_TRACER_MAX_TRACE
3716 /* resize @tr's buffer to the size of @size_tr's entries */
3717 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
3718                                         struct trace_buffer *size_buf, int cpu_id)
3719 {
3720         int cpu, ret = 0;
3721
3722         if (cpu_id == RING_BUFFER_ALL_CPUS) {
3723                 for_each_tracing_cpu(cpu) {
3724                         ret = ring_buffer_resize(trace_buf->buffer,
3725                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
3726                         if (ret < 0)
3727                                 break;
3728                         per_cpu_ptr(trace_buf->data, cpu)->entries =
3729                                 per_cpu_ptr(size_buf->data, cpu)->entries;
3730                 }
3731         } else {
3732                 ret = ring_buffer_resize(trace_buf->buffer,
3733                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
3734                 if (ret == 0)
3735                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
3736                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
3737         }
3738
3739         return ret;
3740 }
3741 #endif /* CONFIG_TRACER_MAX_TRACE */
3742
3743 static int __tracing_resize_ring_buffer(struct trace_array *tr,
3744                                         unsigned long size, int cpu)
3745 {
3746         int ret;
3747
3748         /*
3749          * If kernel or user changes the size of the ring buffer
3750          * we use the size that was given, and we can forget about
3751          * expanding it later.
3752          */
3753         ring_buffer_expanded = true;
3754
3755         /* May be called before buffers are initialized */
3756         if (!tr->trace_buffer.buffer)
3757                 return 0;
3758
3759         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
3760         if (ret < 0)
3761                 return ret;
3762
3763 #ifdef CONFIG_TRACER_MAX_TRACE
3764         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
3765             !tr->current_trace->use_max_tr)
3766                 goto out;
3767
3768         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
3769         if (ret < 0) {
3770                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
3771                                                      &tr->trace_buffer, cpu);
3772                 if (r < 0) {
3773                         /*
3774                          * AARGH! We are left with different
3775                          * size max buffer!!!!
3776                          * The max buffer is our "snapshot" buffer.
3777                          * When a tracer needs a snapshot (one of the
3778                          * latency tracers), it swaps the max buffer
3779                          * with the saved snap shot. We succeeded to
3780                          * update the size of the main buffer, but failed to
3781                          * update the size of the max buffer. But when we tried
3782                          * to reset the main buffer to the original size, we
3783                          * failed there too. This is very unlikely to
3784                          * happen, but if it does, warn and kill all
3785                          * tracing.
3786                          */
3787                         WARN_ON(1);
3788                         tracing_disabled = 1;
3789                 }
3790                 return ret;
3791         }
3792
3793         if (cpu == RING_BUFFER_ALL_CPUS)
3794                 set_buffer_entries(&tr->max_buffer, size);
3795         else
3796                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
3797
3798  out:
3799 #endif /* CONFIG_TRACER_MAX_TRACE */
3800
3801         if (cpu == RING_BUFFER_ALL_CPUS)
3802                 set_buffer_entries(&tr->trace_buffer, size);
3803         else
3804                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
3805
3806         return ret;
3807 }
3808
3809 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
3810                                           unsigned long size, int cpu_id)
3811 {
3812         int ret = size;
3813
3814         mutex_lock(&trace_types_lock);
3815
3816         if (cpu_id != RING_BUFFER_ALL_CPUS) {
3817                 /* make sure, this cpu is enabled in the mask */
3818                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
3819                         ret = -EINVAL;
3820                         goto out;
3821                 }
3822         }
3823
3824         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
3825         if (ret < 0)
3826                 ret = -ENOMEM;
3827
3828 out:
3829         mutex_unlock(&trace_types_lock);
3830
3831         return ret;
3832 }
3833
3834
3835 /**
3836  * tracing_update_buffers - used by tracing facility to expand ring buffers
3837  *
3838  * To save on memory when the tracing is never used on a system with it
3839  * configured in. The ring buffers are set to a minimum size. But once
3840  * a user starts to use the tracing facility, then they need to grow
3841  * to their default size.
3842  *
3843  * This function is to be called when a tracer is about to be used.
3844  */
3845 int tracing_update_buffers(void)
3846 {
3847         int ret = 0;
3848
3849         mutex_lock(&trace_types_lock);
3850         if (!ring_buffer_expanded)
3851                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
3852                                                 RING_BUFFER_ALL_CPUS);
3853         mutex_unlock(&trace_types_lock);
3854
3855         return ret;
3856 }
3857
3858 struct trace_option_dentry;
3859
3860 static struct trace_option_dentry *
3861 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
3862
3863 static void
3864 destroy_trace_option_files(struct trace_option_dentry *topts);
3865
3866 static int tracing_set_tracer(const char *buf)
3867 {
3868         static struct trace_option_dentry *topts;
3869         struct trace_array *tr = &global_trace;
3870         struct tracer *t;
3871 #ifdef CONFIG_TRACER_MAX_TRACE
3872         bool had_max_tr;
3873 #endif
3874         int ret = 0;
3875
3876         mutex_lock(&trace_types_lock);
3877
3878         if (!ring_buffer_expanded) {
3879                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
3880                                                 RING_BUFFER_ALL_CPUS);
3881                 if (ret < 0)
3882                         goto out;
3883                 ret = 0;
3884         }
3885
3886         for (t = trace_types; t; t = t->next) {
3887                 if (strcmp(t->name, buf) == 0)
3888                         break;
3889         }
3890         if (!t) {
3891                 ret = -EINVAL;
3892                 goto out;
3893         }
3894         if (t == tr->current_trace)
3895                 goto out;
3896
3897         trace_branch_disable();
3898
3899         tr->current_trace->enabled = false;
3900
3901         if (tr->current_trace->reset)
3902                 tr->current_trace->reset(tr);
3903
3904         /* Current trace needs to be nop_trace before synchronize_sched */
3905         tr->current_trace = &nop_trace;
3906
3907 #ifdef CONFIG_TRACER_MAX_TRACE
3908         had_max_tr = tr->allocated_snapshot;
3909
3910         if (had_max_tr && !t->use_max_tr) {
3911                 /*
3912                  * We need to make sure that the update_max_tr sees that
3913                  * current_trace changed to nop_trace to keep it from
3914                  * swapping the buffers after we resize it.
3915                  * The update_max_tr is called from interrupts disabled
3916                  * so a synchronized_sched() is sufficient.
3917                  */
3918                 synchronize_sched();
3919                 free_snapshot(tr);
3920         }
3921 #endif
3922         destroy_trace_option_files(topts);
3923
3924         topts = create_trace_option_files(tr, t);
3925
3926 #ifdef CONFIG_TRACER_MAX_TRACE
3927         if (t->use_max_tr && !had_max_tr) {
3928                 ret = alloc_snapshot(tr);
3929                 if (ret < 0)
3930                         goto out;
3931         }
3932 #endif
3933
3934         if (t->init) {
3935                 ret = tracer_init(t, tr);
3936                 if (ret)
3937                         goto out;
3938         }
3939
3940         tr->current_trace = t;
3941         tr->current_trace->enabled = true;
3942         trace_branch_enable(tr);
3943  out:
3944         mutex_unlock(&trace_types_lock);
3945
3946         return ret;
3947 }
3948
3949 static ssize_t
3950 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
3951                         size_t cnt, loff_t *ppos)
3952 {
3953         char buf[MAX_TRACER_SIZE+1];
3954         int i;
3955         size_t ret;
3956         int err;
3957
3958         ret = cnt;
3959
3960         if (cnt > MAX_TRACER_SIZE)
3961                 cnt = MAX_TRACER_SIZE;
3962
3963         if (copy_from_user(&buf, ubuf, cnt))
3964                 return -EFAULT;
3965
3966         buf[cnt] = 0;
3967
3968         /* strip ending whitespace. */
3969         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
3970                 buf[i] = 0;
3971
3972         err = tracing_set_tracer(buf);
3973         if (err)
3974                 return err;
3975
3976         *ppos += ret;
3977
3978         return ret;
3979 }
3980
3981 static ssize_t
3982 tracing_max_lat_read(struct file *filp, char __user *ubuf,
3983                      size_t cnt, loff_t *ppos)
3984 {
3985         unsigned long *ptr = filp->private_data;
3986         char buf[64];
3987         int r;
3988
3989         r = snprintf(buf, sizeof(buf), "%ld\n",
3990                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
3991         if (r > sizeof(buf))
3992                 r = sizeof(buf);
3993         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3994 }
3995
3996 static ssize_t
3997 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
3998                       size_t cnt, loff_t *ppos)
3999 {
4000         unsigned long *ptr = filp->private_data;
4001         unsigned long val;
4002         int ret;
4003
4004         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4005         if (ret)
4006                 return ret;
4007
4008         *ptr = val * 1000;
4009
4010         return cnt;
4011 }
4012
4013 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4014 {
4015         struct trace_array *tr = inode->i_private;
4016         struct trace_iterator *iter;
4017         int ret = 0;
4018
4019         if (tracing_disabled)
4020                 return -ENODEV;
4021
4022         if (trace_array_get(tr) < 0)
4023                 return -ENODEV;
4024
4025         mutex_lock(&trace_types_lock);
4026
4027         /* create a buffer to store the information to pass to userspace */
4028         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4029         if (!iter) {
4030                 ret = -ENOMEM;
4031                 __trace_array_put(tr);
4032                 goto out;
4033         }
4034
4035         /*
4036          * We make a copy of the current tracer to avoid concurrent
4037          * changes on it while we are reading.
4038          */
4039         iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
4040         if (!iter->trace) {
4041                 ret = -ENOMEM;
4042                 goto fail;
4043         }
4044         *iter->trace = *tr->current_trace;
4045
4046         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4047                 ret = -ENOMEM;
4048                 goto fail;
4049         }
4050
4051         /* trace pipe does not show start of buffer */
4052         cpumask_setall(iter->started);
4053
4054         if (trace_flags & TRACE_ITER_LATENCY_FMT)
4055                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4056
4057         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4058         if (trace_clocks[tr->clock_id].in_ns)
4059                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4060
4061         iter->tr = tr;
4062         iter->trace_buffer = &tr->trace_buffer;
4063         iter->cpu_file = tracing_get_cpu(inode);
4064         mutex_init(&iter->mutex);
4065         filp->private_data = iter;
4066
4067         if (iter->trace->pipe_open)
4068                 iter->trace->pipe_open(iter);
4069
4070         nonseekable_open(inode, filp);
4071 out:
4072         mutex_unlock(&trace_types_lock);
4073         return ret;
4074
4075 fail:
4076         kfree(iter->trace);
4077         kfree(iter);
4078         __trace_array_put(tr);
4079         mutex_unlock(&trace_types_lock);
4080         return ret;
4081 }
4082
4083 static int tracing_release_pipe(struct inode *inode, struct file *file)
4084 {
4085         struct trace_iterator *iter = file->private_data;
4086         struct trace_array *tr = inode->i_private;
4087
4088         mutex_lock(&trace_types_lock);
4089
4090         if (iter->trace->pipe_close)
4091                 iter->trace->pipe_close(iter);
4092
4093         mutex_unlock(&trace_types_lock);
4094
4095         free_cpumask_var(iter->started);
4096         mutex_destroy(&iter->mutex);
4097         kfree(iter->trace);
4098         kfree(iter);
4099
4100         trace_array_put(tr);
4101
4102         return 0;
4103 }
4104
4105 static unsigned int
4106 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4107 {
4108         /* Iterators are static, they should be filled or empty */
4109         if (trace_buffer_iter(iter, iter->cpu_file))
4110                 return POLLIN | POLLRDNORM;
4111
4112         if (trace_flags & TRACE_ITER_BLOCK)
4113                 /*
4114                  * Always select as readable when in blocking mode
4115                  */
4116                 return POLLIN | POLLRDNORM;
4117         else
4118                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4119                                              filp, poll_table);
4120 }
4121
4122 static unsigned int
4123 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4124 {
4125         struct trace_iterator *iter = filp->private_data;
4126
4127         return trace_poll(iter, filp, poll_table);
4128 }
4129
4130 /*
4131  * This is a make-shift waitqueue.
4132  * A tracer might use this callback on some rare cases:
4133  *
4134  *  1) the current tracer might hold the runqueue lock when it wakes up
4135  *     a reader, hence a deadlock (sched, function, and function graph tracers)
4136  *  2) the function tracers, trace all functions, we don't want
4137  *     the overhead of calling wake_up and friends
4138  *     (and tracing them too)
4139  *
4140  *     Anyway, this is really very primitive wakeup.
4141  */
4142 void poll_wait_pipe(struct trace_iterator *iter)
4143 {
4144         set_current_state(TASK_INTERRUPTIBLE);
4145         /* sleep for 100 msecs, and try again. */
4146         schedule_timeout(HZ / 10);
4147 }
4148
4149 /* Must be called with trace_types_lock mutex held. */
4150 static int tracing_wait_pipe(struct file *filp)
4151 {
4152         struct trace_iterator *iter = filp->private_data;
4153
4154         while (trace_empty(iter)) {
4155
4156                 if ((filp->f_flags & O_NONBLOCK)) {
4157                         return -EAGAIN;
4158                 }
4159
4160                 mutex_unlock(&iter->mutex);
4161
4162                 iter->trace->wait_pipe(iter);
4163
4164                 mutex_lock(&iter->mutex);
4165
4166                 if (signal_pending(current))
4167                         return -EINTR;
4168
4169                 /*
4170                  * We block until we read something and tracing is disabled.
4171                  * We still block if tracing is disabled, but we have never
4172                  * read anything. This allows a user to cat this file, and
4173                  * then enable tracing. But after we have read something,
4174                  * we give an EOF when tracing is again disabled.
4175                  *
4176                  * iter->pos will be 0 if we haven't read anything.
4177                  */
4178                 if (!tracing_is_on() && iter->pos)
4179                         break;
4180         }
4181
4182         return 1;
4183 }
4184
4185 /*
4186  * Consumer reader.
4187  */
4188 static ssize_t
4189 tracing_read_pipe(struct file *filp, char __user *ubuf,
4190                   size_t cnt, loff_t *ppos)
4191 {
4192         struct trace_iterator *iter = filp->private_data;
4193         struct trace_array *tr = iter->tr;
4194         ssize_t sret;
4195
4196         /* return any leftover data */
4197         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4198         if (sret != -EBUSY)
4199                 return sret;
4200
4201         trace_seq_init(&iter->seq);
4202
4203         /* copy the tracer to avoid using a global lock all around */
4204         mutex_lock(&trace_types_lock);
4205         if (unlikely(iter->trace->name != tr->current_trace->name))
4206                 *iter->trace = *tr->current_trace;
4207         mutex_unlock(&trace_types_lock);
4208
4209         /*
4210          * Avoid more than one consumer on a single file descriptor
4211          * This is just a matter of traces coherency, the ring buffer itself
4212          * is protected.
4213          */
4214         mutex_lock(&iter->mutex);
4215         if (iter->trace->read) {
4216                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4217                 if (sret)
4218                         goto out;
4219         }
4220
4221 waitagain:
4222         sret = tracing_wait_pipe(filp);
4223         if (sret <= 0)
4224                 goto out;
4225
4226         /* stop when tracing is finished */
4227         if (trace_empty(iter)) {
4228                 sret = 0;
4229                 goto out;
4230         }
4231
4232         if (cnt >= PAGE_SIZE)
4233                 cnt = PAGE_SIZE - 1;
4234
4235         /* reset all but tr, trace, and overruns */
4236         memset(&iter->seq, 0,
4237                sizeof(struct trace_iterator) -
4238                offsetof(struct trace_iterator, seq));
4239         cpumask_clear(iter->started);
4240         iter->pos = -1;
4241
4242         trace_event_read_lock();
4243         trace_access_lock(iter->cpu_file);
4244         while (trace_find_next_entry_inc(iter) != NULL) {
4245                 enum print_line_t ret;
4246                 int len = iter->seq.len;
4247
4248                 ret = print_trace_line(iter);
4249                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4250                         /* don't print partial lines */
4251                         iter->seq.len = len;
4252                         break;
4253                 }
4254                 if (ret != TRACE_TYPE_NO_CONSUME)
4255                         trace_consume(iter);
4256
4257                 if (iter->seq.len >= cnt)
4258                         break;
4259
4260                 /*
4261                  * Setting the full flag means we reached the trace_seq buffer
4262                  * size and we should leave by partial output condition above.
4263                  * One of the trace_seq_* functions is not used properly.
4264                  */
4265                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4266                           iter->ent->type);
4267         }
4268         trace_access_unlock(iter->cpu_file);
4269         trace_event_read_unlock();
4270
4271         /* Now copy what we have to the user */
4272         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4273         if (iter->seq.readpos >= iter->seq.len)
4274                 trace_seq_init(&iter->seq);
4275
4276         /*
4277          * If there was nothing to send to user, in spite of consuming trace
4278          * entries, go back to wait for more entries.
4279          */
4280         if (sret == -EBUSY)
4281                 goto waitagain;
4282
4283 out:
4284         mutex_unlock(&iter->mutex);
4285
4286         return sret;
4287 }
4288
4289 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4290                                      unsigned int idx)
4291 {
4292         __free_page(spd->pages[idx]);
4293 }
4294
4295 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4296         .can_merge              = 0,
4297         .map                    = generic_pipe_buf_map,
4298         .unmap                  = generic_pipe_buf_unmap,
4299         .confirm                = generic_pipe_buf_confirm,
4300         .release                = generic_pipe_buf_release,
4301         .steal                  = generic_pipe_buf_steal,
4302         .get                    = generic_pipe_buf_get,
4303 };
4304
4305 static size_t
4306 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4307 {
4308         size_t count;
4309         int ret;
4310
4311         /* Seq buffer is page-sized, exactly what we need. */
4312         for (;;) {
4313                 count = iter->seq.len;
4314                 ret = print_trace_line(iter);
4315                 count = iter->seq.len - count;
4316                 if (rem < count) {
4317                         rem = 0;
4318                         iter->seq.len -= count;
4319                         break;
4320                 }
4321                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4322                         iter->seq.len -= count;
4323                         break;
4324                 }
4325
4326                 if (ret != TRACE_TYPE_NO_CONSUME)
4327                         trace_consume(iter);
4328                 rem -= count;
4329                 if (!trace_find_next_entry_inc(iter))   {
4330                         rem = 0;
4331                         iter->ent = NULL;
4332                         break;
4333                 }
4334         }
4335
4336         return rem;
4337 }
4338
4339 static ssize_t tracing_splice_read_pipe(struct file *filp,
4340                                         loff_t *ppos,
4341                                         struct pipe_inode_info *pipe,
4342                                         size_t len,
4343                                         unsigned int flags)
4344 {
4345         struct page *pages_def[PIPE_DEF_BUFFERS];
4346         struct partial_page partial_def[PIPE_DEF_BUFFERS];
4347         struct trace_iterator *iter = filp->private_data;
4348         struct splice_pipe_desc spd = {
4349                 .pages          = pages_def,
4350                 .partial        = partial_def,
4351                 .nr_pages       = 0, /* This gets updated below. */
4352                 .nr_pages_max   = PIPE_DEF_BUFFERS,
4353                 .flags          = flags,
4354                 .ops            = &tracing_pipe_buf_ops,
4355                 .spd_release    = tracing_spd_release_pipe,
4356         };
4357         struct trace_array *tr = iter->tr;
4358         ssize_t ret;
4359         size_t rem;
4360         unsigned int i;
4361
4362         if (splice_grow_spd(pipe, &spd))
4363                 return -ENOMEM;
4364
4365         /* copy the tracer to avoid using a global lock all around */
4366         mutex_lock(&trace_types_lock);
4367         if (unlikely(iter->trace->name != tr->current_trace->name))
4368                 *iter->trace = *tr->current_trace;
4369         mutex_unlock(&trace_types_lock);
4370
4371         mutex_lock(&iter->mutex);
4372
4373         if (iter->trace->splice_read) {
4374                 ret = iter->trace->splice_read(iter, filp,
4375                                                ppos, pipe, len, flags);
4376                 if (ret)
4377                         goto out_err;
4378         }
4379
4380         ret = tracing_wait_pipe(filp);
4381         if (ret <= 0)
4382                 goto out_err;
4383
4384         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4385                 ret = -EFAULT;
4386                 goto out_err;
4387         }
4388
4389         trace_event_read_lock();
4390         trace_access_lock(iter->cpu_file);
4391
4392         /* Fill as many pages as possible. */
4393         for (i = 0, rem = len; i < pipe->buffers && rem; i++) {
4394                 spd.pages[i] = alloc_page(GFP_KERNEL);
4395                 if (!spd.pages[i])
4396                         break;
4397
4398                 rem = tracing_fill_pipe_page(rem, iter);
4399
4400                 /* Copy the data into the page, so we can start over. */
4401                 ret = trace_seq_to_buffer(&iter->seq,
4402                                           page_address(spd.pages[i]),
4403                                           iter->seq.len);
4404                 if (ret < 0) {
4405                         __free_page(spd.pages[i]);
4406                         break;
4407                 }
4408                 spd.partial[i].offset = 0;
4409                 spd.partial[i].len = iter->seq.len;
4410
4411                 trace_seq_init(&iter->seq);
4412         }
4413
4414         trace_access_unlock(iter->cpu_file);
4415         trace_event_read_unlock();
4416         mutex_unlock(&iter->mutex);
4417
4418         spd.nr_pages = i;
4419
4420         ret = splice_to_pipe(pipe, &spd);
4421 out:
4422         splice_shrink_spd(&spd);
4423         return ret;
4424
4425 out_err:
4426         mutex_unlock(&iter->mutex);
4427         goto out;
4428 }
4429
4430 static ssize_t
4431 tracing_entries_read(struct file *filp, char __user *ubuf,
4432                      size_t cnt, loff_t *ppos)
4433 {
4434         struct inode *inode = file_inode(filp);
4435         struct trace_array *tr = inode->i_private;
4436         int cpu = tracing_get_cpu(inode);
4437         char buf[64];
4438         int r = 0;
4439         ssize_t ret;
4440
4441         mutex_lock(&trace_types_lock);
4442
4443         if (cpu == RING_BUFFER_ALL_CPUS) {
4444                 int cpu, buf_size_same;
4445                 unsigned long size;
4446
4447                 size = 0;
4448                 buf_size_same = 1;
4449                 /* check if all cpu sizes are same */
4450                 for_each_tracing_cpu(cpu) {
4451                         /* fill in the size from first enabled cpu */
4452                         if (size == 0)
4453                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
4454                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
4455                                 buf_size_same = 0;
4456                                 break;
4457                         }
4458                 }
4459
4460                 if (buf_size_same) {
4461                         if (!ring_buffer_expanded)
4462                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
4463                                             size >> 10,
4464                                             trace_buf_size >> 10);
4465                         else
4466                                 r = sprintf(buf, "%lu\n", size >> 10);
4467                 } else
4468                         r = sprintf(buf, "X\n");
4469         } else
4470                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
4471
4472         mutex_unlock(&trace_types_lock);
4473
4474         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4475         return ret;
4476 }
4477
4478 static ssize_t
4479 tracing_entries_write(struct file *filp, const char __user *ubuf,
4480                       size_t cnt, loff_t *ppos)
4481 {
4482         struct inode *inode = file_inode(filp);
4483         struct trace_array *tr = inode->i_private;
4484         unsigned long val;
4485         int ret;
4486
4487         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4488         if (ret)
4489                 return ret;
4490
4491         /* must have at least 1 entry */
4492         if (!val)
4493                 return -EINVAL;
4494
4495         /* value is in KB */
4496         val <<= 10;
4497         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
4498         if (ret < 0)
4499                 return ret;
4500
4501         *ppos += cnt;
4502
4503         return cnt;
4504 }
4505
4506 static ssize_t
4507 tracing_total_entries_read(struct file *filp, char __user *ubuf,
4508                                 size_t cnt, loff_t *ppos)
4509 {
4510         struct trace_array *tr = filp->private_data;
4511         char buf[64];
4512         int r, cpu;
4513         unsigned long size = 0, expanded_size = 0;
4514
4515         mutex_lock(&trace_types_lock);
4516         for_each_tracing_cpu(cpu) {
4517                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
4518                 if (!ring_buffer_expanded)
4519                         expanded_size += trace_buf_size >> 10;
4520         }
4521         if (ring_buffer_expanded)
4522                 r = sprintf(buf, "%lu\n", size);
4523         else
4524                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
4525         mutex_unlock(&trace_types_lock);
4526
4527         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4528 }
4529
4530 static ssize_t
4531 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
4532                           size_t cnt, loff_t *ppos)
4533 {
4534         /*
4535          * There is no need to read what the user has written, this function
4536          * is just to make sure that there is no error when "echo" is used
4537          */
4538
4539         *ppos += cnt;
4540
4541         return cnt;
4542 }
4543
4544 static int
4545 tracing_free_buffer_release(struct inode *inode, struct file *filp)
4546 {
4547         struct trace_array *tr = inode->i_private;
4548
4549         /* disable tracing ? */
4550         if (trace_flags & TRACE_ITER_STOP_ON_FREE)
4551                 tracer_tracing_off(tr);
4552         /* resize the ring buffer to 0 */
4553         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
4554
4555         trace_array_put(tr);
4556
4557         return 0;
4558 }
4559
4560 static ssize_t
4561 tracing_mark_write(struct file *filp, const char __user *ubuf,
4562                                         size_t cnt, loff_t *fpos)
4563 {
4564         unsigned long addr = (unsigned long)ubuf;
4565         struct trace_array *tr = filp->private_data;
4566         struct ring_buffer_event *event;
4567         struct ring_buffer *buffer;
4568         struct print_entry *entry;
4569         unsigned long irq_flags;
4570         struct page *pages[2];
4571         void *map_page[2];
4572         int nr_pages = 1;
4573         ssize_t written;
4574         int offset;
4575         int size;
4576         int len;
4577         int ret;
4578         int i;
4579
4580         if (tracing_disabled)
4581                 return -EINVAL;
4582
4583         if (!(trace_flags & TRACE_ITER_MARKERS))
4584                 return -EINVAL;
4585
4586         if (cnt > TRACE_BUF_SIZE)
4587                 cnt = TRACE_BUF_SIZE;
4588
4589         /*
4590          * Userspace is injecting traces into the kernel trace buffer.
4591          * We want to be as non intrusive as possible.
4592          * To do so, we do not want to allocate any special buffers
4593          * or take any locks, but instead write the userspace data
4594          * straight into the ring buffer.
4595          *
4596          * First we need to pin the userspace buffer into memory,
4597          * which, most likely it is, because it just referenced it.
4598          * But there's no guarantee that it is. By using get_user_pages_fast()
4599          * and kmap_atomic/kunmap_atomic() we can get access to the
4600          * pages directly. We then write the data directly into the
4601          * ring buffer.
4602          */
4603         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
4604
4605         /* check if we cross pages */
4606         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
4607                 nr_pages = 2;
4608
4609         offset = addr & (PAGE_SIZE - 1);
4610         addr &= PAGE_MASK;
4611
4612         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
4613         if (ret < nr_pages) {
4614                 while (--ret >= 0)
4615                         put_page(pages[ret]);
4616                 written = -EFAULT;
4617                 goto out;
4618         }
4619
4620         for (i = 0; i < nr_pages; i++)
4621                 map_page[i] = kmap_atomic(pages[i]);
4622
4623         local_save_flags(irq_flags);
4624         size = sizeof(*entry) + cnt + 2; /* possible \n added */
4625         buffer = tr->trace_buffer.buffer;
4626         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
4627                                           irq_flags, preempt_count());
4628         if (!event) {
4629                 /* Ring buffer disabled, return as if not open for write */
4630                 written = -EBADF;
4631                 goto out_unlock;
4632         }
4633
4634         entry = ring_buffer_event_data(event);
4635         entry->ip = _THIS_IP_;
4636
4637         if (nr_pages == 2) {
4638                 len = PAGE_SIZE - offset;
4639                 memcpy(&entry->buf, map_page[0] + offset, len);
4640                 memcpy(&entry->buf[len], map_page[1], cnt - len);
4641         } else
4642                 memcpy(&entry->buf, map_page[0] + offset, cnt);
4643
4644         if (entry->buf[cnt - 1] != '\n') {
4645                 entry->buf[cnt] = '\n';
4646                 entry->buf[cnt + 1] = '\0';
4647         } else
4648                 entry->buf[cnt] = '\0';
4649
4650         __buffer_unlock_commit(buffer, event);
4651
4652         written = cnt;
4653
4654         *fpos += written;
4655
4656  out_unlock:
4657         for (i = 0; i < nr_pages; i++){
4658                 kunmap_atomic(map_page[i]);
4659                 put_page(pages[i]);
4660         }
4661  out:
4662         return written;
4663 }
4664
4665 static int tracing_clock_show(struct seq_file *m, void *v)
4666 {
4667         struct trace_array *tr = m->private;
4668         int i;
4669
4670         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
4671                 seq_printf(m,
4672                         "%s%s%s%s", i ? " " : "",
4673                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
4674                         i == tr->clock_id ? "]" : "");
4675         seq_putc(m, '\n');
4676
4677         return 0;
4678 }
4679
4680 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
4681                                    size_t cnt, loff_t *fpos)
4682 {
4683         struct seq_file *m = filp->private_data;
4684         struct trace_array *tr = m->private;
4685         char buf[64];
4686         const char *clockstr;
4687         int i;
4688
4689         if (cnt >= sizeof(buf))
4690                 return -EINVAL;
4691
4692         if (copy_from_user(&buf, ubuf, cnt))
4693                 return -EFAULT;
4694
4695         buf[cnt] = 0;
4696
4697         clockstr = strstrip(buf);
4698
4699         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
4700                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
4701                         break;
4702         }
4703         if (i == ARRAY_SIZE(trace_clocks))
4704                 return -EINVAL;
4705
4706         mutex_lock(&trace_types_lock);
4707
4708         tr->clock_id = i;
4709
4710         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
4711
4712         /*
4713          * New clock may not be consistent with the previous clock.
4714          * Reset the buffer so that it doesn't have incomparable timestamps.
4715          */
4716         tracing_reset_online_cpus(&tr->trace_buffer);
4717
4718 #ifdef CONFIG_TRACER_MAX_TRACE
4719         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
4720                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
4721         tracing_reset_online_cpus(&tr->max_buffer);
4722 #endif
4723
4724         mutex_unlock(&trace_types_lock);
4725
4726         *fpos += cnt;
4727
4728         return cnt;
4729 }
4730
4731 static int tracing_clock_open(struct inode *inode, struct file *file)
4732 {
4733         struct trace_array *tr = inode->i_private;
4734         int ret;
4735
4736         if (tracing_disabled)
4737                 return -ENODEV;
4738
4739         if (trace_array_get(tr))
4740                 return -ENODEV;
4741
4742         ret = single_open(file, tracing_clock_show, inode->i_private);
4743         if (ret < 0)
4744                 trace_array_put(tr);
4745
4746         return ret;
4747 }
4748
4749 struct ftrace_buffer_info {
4750         struct trace_iterator   iter;
4751         void                    *spare;
4752         unsigned int            read;
4753 };
4754
4755 #ifdef CONFIG_TRACER_SNAPSHOT
4756 static int tracing_snapshot_open(struct inode *inode, struct file *file)
4757 {
4758         struct trace_array *tr = inode->i_private;
4759         struct trace_iterator *iter;
4760         struct seq_file *m;
4761         int ret = 0;
4762
4763         if (trace_array_get(tr) < 0)
4764                 return -ENODEV;
4765
4766         if (file->f_mode & FMODE_READ) {
4767                 iter = __tracing_open(inode, file, true);
4768                 if (IS_ERR(iter))
4769                         ret = PTR_ERR(iter);
4770         } else {
4771                 /* Writes still need the seq_file to hold the private data */
4772                 ret = -ENOMEM;
4773                 m = kzalloc(sizeof(*m), GFP_KERNEL);
4774                 if (!m)
4775                         goto out;
4776                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4777                 if (!iter) {
4778                         kfree(m);
4779                         goto out;
4780                 }
4781                 ret = 0;
4782
4783                 iter->tr = tr;
4784                 iter->trace_buffer = &tr->max_buffer;
4785                 iter->cpu_file = tracing_get_cpu(inode);
4786                 m->private = iter;
4787                 file->private_data = m;
4788         }
4789 out:
4790         if (ret < 0)
4791                 trace_array_put(tr);
4792
4793         return ret;
4794 }
4795
4796 static ssize_t
4797 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
4798                        loff_t *ppos)
4799 {
4800         struct seq_file *m = filp->private_data;
4801         struct trace_iterator *iter = m->private;
4802         struct trace_array *tr = iter->tr;
4803         unsigned long val;
4804         int ret;
4805
4806         ret = tracing_update_buffers();
4807         if (ret < 0)
4808                 return ret;
4809
4810         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4811         if (ret)
4812                 return ret;
4813
4814         mutex_lock(&trace_types_lock);
4815
4816         if (tr->current_trace->use_max_tr) {
4817                 ret = -EBUSY;
4818                 goto out;
4819         }
4820
4821         switch (val) {
4822         case 0:
4823                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4824                         ret = -EINVAL;
4825                         break;
4826                 }
4827                 if (tr->allocated_snapshot)
4828                         free_snapshot(tr);
4829                 break;
4830         case 1:
4831 /* Only allow per-cpu swap if the ring buffer supports it */
4832 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
4833                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4834                         ret = -EINVAL;
4835                         break;
4836                 }
4837 #endif
4838                 if (!tr->allocated_snapshot) {
4839                         ret = alloc_snapshot(tr);
4840                         if (ret < 0)
4841                                 break;
4842                 }
4843                 local_irq_disable();
4844                 /* Now, we're going to swap */
4845                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4846                         update_max_tr(tr, current, smp_processor_id());
4847                 else
4848                         update_max_tr_single(tr, current, iter->cpu_file);
4849                 local_irq_enable();
4850                 break;
4851         default:
4852                 if (tr->allocated_snapshot) {
4853                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4854                                 tracing_reset_online_cpus(&tr->max_buffer);
4855                         else
4856                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
4857                 }
4858                 break;
4859         }
4860
4861         if (ret >= 0) {
4862                 *ppos += cnt;
4863                 ret = cnt;
4864         }
4865 out:
4866         mutex_unlock(&trace_types_lock);
4867         return ret;
4868 }
4869
4870 static int tracing_snapshot_release(struct inode *inode, struct file *file)
4871 {
4872         struct seq_file *m = file->private_data;
4873         int ret;
4874
4875         ret = tracing_release(inode, file);
4876
4877         if (file->f_mode & FMODE_READ)
4878                 return ret;
4879
4880         /* If write only, the seq_file is just a stub */
4881         if (m)
4882                 kfree(m->private);
4883         kfree(m);
4884
4885         return 0;
4886 }
4887
4888 static int tracing_buffers_open(struct inode *inode, struct file *filp);
4889 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
4890                                     size_t count, loff_t *ppos);
4891 static int tracing_buffers_release(struct inode *inode, struct file *file);
4892 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
4893                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
4894
4895 static int snapshot_raw_open(struct inode *inode, struct file *filp)
4896 {
4897         struct ftrace_buffer_info *info;
4898         int ret;
4899
4900         ret = tracing_buffers_open(inode, filp);
4901         if (ret < 0)
4902                 return ret;
4903
4904         info = filp->private_data;
4905
4906         if (info->iter.trace->use_max_tr) {
4907                 tracing_buffers_release(inode, filp);
4908                 return -EBUSY;
4909         }
4910
4911         info->iter.snapshot = true;
4912         info->iter.trace_buffer = &info->iter.tr->max_buffer;
4913
4914         return ret;
4915 }
4916
4917 #endif /* CONFIG_TRACER_SNAPSHOT */
4918
4919
4920 static const struct file_operations tracing_max_lat_fops = {
4921         .open           = tracing_open_generic,
4922         .read           = tracing_max_lat_read,
4923         .write          = tracing_max_lat_write,
4924         .llseek         = generic_file_llseek,
4925 };
4926
4927 static const struct file_operations set_tracer_fops = {
4928         .open           = tracing_open_generic,
4929         .read           = tracing_set_trace_read,
4930         .write          = tracing_set_trace_write,
4931         .llseek         = generic_file_llseek,
4932 };
4933
4934 static const struct file_operations tracing_pipe_fops = {
4935         .open           = tracing_open_pipe,
4936         .poll           = tracing_poll_pipe,
4937         .read           = tracing_read_pipe,
4938         .splice_read    = tracing_splice_read_pipe,
4939         .release        = tracing_release_pipe,
4940         .llseek         = no_llseek,
4941 };
4942
4943 static const struct file_operations tracing_entries_fops = {
4944         .open           = tracing_open_generic_tr,
4945         .read           = tracing_entries_read,
4946         .write          = tracing_entries_write,
4947         .llseek         = generic_file_llseek,
4948         .release        = tracing_release_generic_tr,
4949 };
4950
4951 static const struct file_operations tracing_total_entries_fops = {
4952         .open           = tracing_open_generic_tr,
4953         .read           = tracing_total_entries_read,
4954         .llseek         = generic_file_llseek,
4955         .release        = tracing_release_generic_tr,
4956 };
4957
4958 static const struct file_operations tracing_free_buffer_fops = {
4959         .open           = tracing_open_generic_tr,
4960         .write          = tracing_free_buffer_write,
4961         .release        = tracing_free_buffer_release,
4962 };
4963
4964 static const struct file_operations tracing_mark_fops = {
4965         .open           = tracing_open_generic_tr,
4966         .write          = tracing_mark_write,
4967         .llseek         = generic_file_llseek,
4968         .release        = tracing_release_generic_tr,
4969 };
4970
4971 static const struct file_operations trace_clock_fops = {
4972         .open           = tracing_clock_open,
4973         .read           = seq_read,
4974         .llseek         = seq_lseek,
4975         .release        = tracing_single_release_tr,
4976         .write          = tracing_clock_write,
4977 };
4978
4979 #ifdef CONFIG_TRACER_SNAPSHOT
4980 static const struct file_operations snapshot_fops = {
4981         .open           = tracing_snapshot_open,
4982         .read           = seq_read,
4983         .write          = tracing_snapshot_write,
4984         .llseek         = tracing_lseek,
4985         .release        = tracing_snapshot_release,
4986 };
4987
4988 static const struct file_operations snapshot_raw_fops = {
4989         .open           = snapshot_raw_open,
4990         .read           = tracing_buffers_read,
4991         .release        = tracing_buffers_release,
4992         .splice_read    = tracing_buffers_splice_read,
4993         .llseek         = no_llseek,
4994 };
4995
4996 #endif /* CONFIG_TRACER_SNAPSHOT */
4997
4998 static int tracing_buffers_open(struct inode *inode, struct file *filp)
4999 {
5000         struct trace_array *tr = inode->i_private;
5001         struct ftrace_buffer_info *info;
5002         int ret;
5003
5004         if (tracing_disabled)
5005                 return -ENODEV;
5006
5007         if (trace_array_get(tr) < 0)
5008                 return -ENODEV;
5009
5010         info = kzalloc(sizeof(*info), GFP_KERNEL);
5011         if (!info) {
5012                 trace_array_put(tr);
5013                 return -ENOMEM;
5014         }
5015
5016         mutex_lock(&trace_types_lock);
5017
5018         info->iter.tr           = tr;
5019         info->iter.cpu_file     = tracing_get_cpu(inode);
5020         info->iter.trace        = tr->current_trace;
5021         info->iter.trace_buffer = &tr->trace_buffer;
5022         info->spare             = NULL;
5023         /* Force reading ring buffer for first read */
5024         info->read              = (unsigned int)-1;
5025
5026         filp->private_data = info;
5027
5028         mutex_unlock(&trace_types_lock);
5029
5030         ret = nonseekable_open(inode, filp);
5031         if (ret < 0)
5032                 trace_array_put(tr);
5033
5034         return ret;
5035 }
5036
5037 static unsigned int
5038 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5039 {
5040         struct ftrace_buffer_info *info = filp->private_data;
5041         struct trace_iterator *iter = &info->iter;
5042
5043         return trace_poll(iter, filp, poll_table);
5044 }
5045
5046 static ssize_t
5047 tracing_buffers_read(struct file *filp, char __user *ubuf,
5048                      size_t count, loff_t *ppos)
5049 {
5050         struct ftrace_buffer_info *info = filp->private_data;
5051         struct trace_iterator *iter = &info->iter;
5052         ssize_t ret;
5053         ssize_t size;
5054
5055         if (!count)
5056                 return 0;
5057
5058         mutex_lock(&trace_types_lock);
5059
5060 #ifdef CONFIG_TRACER_MAX_TRACE
5061         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5062                 size = -EBUSY;
5063                 goto out_unlock;
5064         }
5065 #endif
5066
5067         if (!info->spare)
5068                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5069                                                           iter->cpu_file);
5070         size = -ENOMEM;
5071         if (!info->spare)
5072                 goto out_unlock;
5073
5074         /* Do we have previous read data to read? */
5075         if (info->read < PAGE_SIZE)
5076                 goto read;
5077
5078  again:
5079         trace_access_lock(iter->cpu_file);
5080         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5081                                     &info->spare,
5082                                     count,
5083                                     iter->cpu_file, 0);
5084         trace_access_unlock(iter->cpu_file);
5085
5086         if (ret < 0) {
5087                 if (trace_empty(iter)) {
5088                         if ((filp->f_flags & O_NONBLOCK)) {
5089                                 size = -EAGAIN;
5090                                 goto out_unlock;
5091                         }
5092                         mutex_unlock(&trace_types_lock);
5093                         iter->trace->wait_pipe(iter);
5094                         mutex_lock(&trace_types_lock);
5095                         if (signal_pending(current)) {
5096                                 size = -EINTR;
5097                                 goto out_unlock;
5098                         }
5099                         goto again;
5100                 }
5101                 size = 0;
5102                 goto out_unlock;
5103         }
5104
5105         info->read = 0;
5106  read:
5107         size = PAGE_SIZE - info->read;
5108         if (size > count)
5109                 size = count;
5110
5111         ret = copy_to_user(ubuf, info->spare + info->read, size);
5112         if (ret == size) {
5113                 size = -EFAULT;
5114                 goto out_unlock;
5115         }
5116         size -= ret;
5117
5118         *ppos += size;
5119         info->read += size;
5120
5121  out_unlock:
5122         mutex_unlock(&trace_types_lock);
5123
5124         return size;
5125 }
5126
5127 static int tracing_buffers_release(struct inode *inode, struct file *file)
5128 {
5129         struct ftrace_buffer_info *info = file->private_data;
5130         struct trace_iterator *iter = &info->iter;
5131
5132         mutex_lock(&trace_types_lock);
5133
5134         __trace_array_put(iter->tr);
5135
5136         if (info->spare)
5137                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5138         kfree(info);
5139
5140         mutex_unlock(&trace_types_lock);
5141
5142         return 0;
5143 }
5144
5145 struct buffer_ref {
5146         struct ring_buffer      *buffer;
5147         void                    *page;
5148         int                     ref;
5149 };
5150
5151 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5152                                     struct pipe_buffer *buf)
5153 {
5154         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5155
5156         if (--ref->ref)
5157                 return;
5158
5159         ring_buffer_free_read_page(ref->buffer, ref->page);
5160         kfree(ref);
5161         buf->private = 0;
5162 }
5163
5164 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5165                                 struct pipe_buffer *buf)
5166 {
5167         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5168
5169         ref->ref++;
5170 }
5171
5172 /* Pipe buffer operations for a buffer. */
5173 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5174         .can_merge              = 0,
5175         .map                    = generic_pipe_buf_map,
5176         .unmap                  = generic_pipe_buf_unmap,
5177         .confirm                = generic_pipe_buf_confirm,
5178         .release                = buffer_pipe_buf_release,
5179         .steal                  = generic_pipe_buf_steal,
5180         .get                    = buffer_pipe_buf_get,
5181 };
5182
5183 /*
5184  * Callback from splice_to_pipe(), if we need to release some pages
5185  * at the end of the spd in case we error'ed out in filling the pipe.
5186  */
5187 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5188 {
5189         struct buffer_ref *ref =
5190                 (struct buffer_ref *)spd->partial[i].private;
5191
5192         if (--ref->ref)
5193                 return;
5194
5195         ring_buffer_free_read_page(ref->buffer, ref->page);
5196         kfree(ref);
5197         spd->partial[i].private = 0;
5198 }
5199
5200 static ssize_t
5201 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5202                             struct pipe_inode_info *pipe, size_t len,
5203                             unsigned int flags)
5204 {
5205         struct ftrace_buffer_info *info = file->private_data;
5206         struct trace_iterator *iter = &info->iter;
5207         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5208         struct page *pages_def[PIPE_DEF_BUFFERS];
5209         struct splice_pipe_desc spd = {
5210                 .pages          = pages_def,
5211                 .partial        = partial_def,
5212                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5213                 .flags          = flags,
5214                 .ops            = &buffer_pipe_buf_ops,
5215                 .spd_release    = buffer_spd_release,
5216         };
5217         struct buffer_ref *ref;
5218         int entries, size, i;
5219         ssize_t ret;
5220
5221         mutex_lock(&trace_types_lock);
5222
5223 #ifdef CONFIG_TRACER_MAX_TRACE
5224         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5225                 ret = -EBUSY;
5226                 goto out;
5227         }
5228 #endif
5229
5230         if (splice_grow_spd(pipe, &spd)) {
5231                 ret = -ENOMEM;
5232                 goto out;
5233         }
5234
5235         if (*ppos & (PAGE_SIZE - 1)) {
5236                 ret = -EINVAL;
5237                 goto out;
5238         }
5239
5240         if (len & (PAGE_SIZE - 1)) {
5241                 if (len < PAGE_SIZE) {
5242                         ret = -EINVAL;
5243                         goto out;
5244                 }
5245                 len &= PAGE_MASK;
5246         }
5247
5248  again:
5249         trace_access_lock(iter->cpu_file);
5250         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5251
5252         for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) {
5253                 struct page *page;
5254                 int r;
5255
5256                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5257                 if (!ref)
5258                         break;
5259
5260                 ref->ref = 1;
5261                 ref->buffer = iter->trace_buffer->buffer;
5262                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5263                 if (!ref->page) {
5264                         kfree(ref);
5265                         break;
5266                 }
5267
5268                 r = ring_buffer_read_page(ref->buffer, &ref->page,
5269                                           len, iter->cpu_file, 1);
5270                 if (r < 0) {
5271                         ring_buffer_free_read_page(ref->buffer, ref->page);
5272                         kfree(ref);
5273                         break;
5274                 }
5275
5276                 /*
5277                  * zero out any left over data, this is going to
5278                  * user land.
5279                  */
5280                 size = ring_buffer_page_len(ref->page);
5281                 if (size < PAGE_SIZE)
5282                         memset(ref->page + size, 0, PAGE_SIZE - size);
5283
5284                 page = virt_to_page(ref->page);
5285
5286                 spd.pages[i] = page;
5287                 spd.partial[i].len = PAGE_SIZE;
5288                 spd.partial[i].offset = 0;
5289                 spd.partial[i].private = (unsigned long)ref;
5290                 spd.nr_pages++;
5291                 *ppos += PAGE_SIZE;
5292
5293                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5294         }
5295
5296         trace_access_unlock(iter->cpu_file);
5297         spd.nr_pages = i;
5298
5299         /* did we read anything? */
5300         if (!spd.nr_pages) {
5301                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) {
5302                         ret = -EAGAIN;
5303                         goto out;
5304                 }
5305                 mutex_unlock(&trace_types_lock);
5306                 iter->trace->wait_pipe(iter);
5307                 mutex_lock(&trace_types_lock);
5308                 if (signal_pending(current)) {
5309                         ret = -EINTR;
5310                         goto out;
5311                 }
5312                 goto again;
5313         }
5314
5315         ret = splice_to_pipe(pipe, &spd);
5316         splice_shrink_spd(&spd);
5317 out:
5318         mutex_unlock(&trace_types_lock);
5319
5320         return ret;
5321 }
5322
5323 static const struct file_operations tracing_buffers_fops = {
5324         .open           = tracing_buffers_open,
5325         .read           = tracing_buffers_read,
5326         .poll           = tracing_buffers_poll,
5327         .release        = tracing_buffers_release,
5328         .splice_read    = tracing_buffers_splice_read,
5329         .llseek         = no_llseek,
5330 };
5331
5332 static ssize_t
5333 tracing_stats_read(struct file *filp, char __user *ubuf,
5334                    size_t count, loff_t *ppos)
5335 {
5336         struct inode *inode = file_inode(filp);
5337         struct trace_array *tr = inode->i_private;
5338         struct trace_buffer *trace_buf = &tr->trace_buffer;
5339         int cpu = tracing_get_cpu(inode);
5340         struct trace_seq *s;
5341         unsigned long cnt;
5342         unsigned long long t;
5343         unsigned long usec_rem;
5344
5345         s = kmalloc(sizeof(*s), GFP_KERNEL);
5346         if (!s)
5347                 return -ENOMEM;
5348
5349         trace_seq_init(s);
5350
5351         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5352         trace_seq_printf(s, "entries: %ld\n", cnt);
5353
5354         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5355         trace_seq_printf(s, "overrun: %ld\n", cnt);
5356
5357         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5358         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5359
5360         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5361         trace_seq_printf(s, "bytes: %ld\n", cnt);
5362
5363         if (trace_clocks[tr->clock_id].in_ns) {
5364                 /* local or global for trace_clock */
5365                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5366                 usec_rem = do_div(t, USEC_PER_SEC);
5367                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5368                                                                 t, usec_rem);
5369
5370                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5371                 usec_rem = do_div(t, USEC_PER_SEC);
5372                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5373         } else {
5374                 /* counter or tsc mode for trace_clock */
5375                 trace_seq_printf(s, "oldest event ts: %llu\n",
5376                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5377
5378                 trace_seq_printf(s, "now ts: %llu\n",
5379                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
5380         }
5381
5382         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5383         trace_seq_printf(s, "dropped events: %ld\n", cnt);
5384
5385         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5386         trace_seq_printf(s, "read events: %ld\n", cnt);
5387
5388         count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
5389
5390         kfree(s);
5391
5392         return count;
5393 }
5394
5395 static const struct file_operations tracing_stats_fops = {
5396         .open           = tracing_open_generic_tr,
5397         .read           = tracing_stats_read,
5398         .llseek         = generic_file_llseek,
5399         .release        = tracing_release_generic_tr,
5400 };
5401
5402 #ifdef CONFIG_DYNAMIC_FTRACE
5403
5404 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
5405 {
5406         return 0;
5407 }
5408
5409 static ssize_t
5410 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5411                   size_t cnt, loff_t *ppos)
5412 {
5413         static char ftrace_dyn_info_buffer[1024];
5414         static DEFINE_MUTEX(dyn_info_mutex);
5415         unsigned long *p = filp->private_data;
5416         char *buf = ftrace_dyn_info_buffer;
5417         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
5418         int r;
5419
5420         mutex_lock(&dyn_info_mutex);
5421         r = sprintf(buf, "%ld ", *p);
5422
5423         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
5424         buf[r++] = '\n';
5425
5426         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5427
5428         mutex_unlock(&dyn_info_mutex);
5429
5430         return r;
5431 }
5432
5433 static const struct file_operations tracing_dyn_info_fops = {
5434         .open           = tracing_open_generic,
5435         .read           = tracing_read_dyn_info,
5436         .llseek         = generic_file_llseek,
5437 };
5438 #endif /* CONFIG_DYNAMIC_FTRACE */
5439
5440 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
5441 static void
5442 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5443 {
5444         tracing_snapshot();
5445 }
5446
5447 static void
5448 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5449 {
5450         unsigned long *count = (long *)data;
5451
5452         if (!*count)
5453                 return;
5454
5455         if (*count != -1)
5456                 (*count)--;
5457
5458         tracing_snapshot();
5459 }
5460
5461 static int
5462 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
5463                       struct ftrace_probe_ops *ops, void *data)
5464 {
5465         long count = (long)data;
5466
5467         seq_printf(m, "%ps:", (void *)ip);
5468
5469         seq_printf(m, "snapshot");
5470
5471         if (count == -1)
5472                 seq_printf(m, ":unlimited\n");
5473         else
5474                 seq_printf(m, ":count=%ld\n", count);
5475
5476         return 0;
5477 }
5478
5479 static struct ftrace_probe_ops snapshot_probe_ops = {
5480         .func                   = ftrace_snapshot,
5481         .print                  = ftrace_snapshot_print,
5482 };
5483
5484 static struct ftrace_probe_ops snapshot_count_probe_ops = {
5485         .func                   = ftrace_count_snapshot,
5486         .print                  = ftrace_snapshot_print,
5487 };
5488
5489 static int
5490 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
5491                                char *glob, char *cmd, char *param, int enable)
5492 {
5493         struct ftrace_probe_ops *ops;
5494         void *count = (void *)-1;
5495         char *number;
5496         int ret;
5497
5498         /* hash funcs only work with set_ftrace_filter */
5499         if (!enable)
5500                 return -EINVAL;
5501
5502         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
5503
5504         if (glob[0] == '!') {
5505                 unregister_ftrace_function_probe_func(glob+1, ops);
5506                 return 0;
5507         }
5508
5509         if (!param)
5510                 goto out_reg;
5511
5512         number = strsep(&param, ":");
5513
5514         if (!strlen(number))
5515                 goto out_reg;
5516
5517         /*
5518          * We use the callback data field (which is a pointer)
5519          * as our counter.
5520          */
5521         ret = kstrtoul(number, 0, (unsigned long *)&count);
5522         if (ret)
5523                 return ret;
5524
5525  out_reg:
5526         ret = register_ftrace_function_probe(glob, ops, count);
5527
5528         if (ret >= 0)
5529                 alloc_snapshot(&global_trace);
5530
5531         return ret < 0 ? ret : 0;
5532 }
5533
5534 static struct ftrace_func_command ftrace_snapshot_cmd = {
5535         .name                   = "snapshot",
5536         .func                   = ftrace_trace_snapshot_callback,
5537 };
5538
5539 static __init int register_snapshot_cmd(void)
5540 {
5541         return register_ftrace_command(&ftrace_snapshot_cmd);
5542 }
5543 #else
5544 static inline __init int register_snapshot_cmd(void) { return 0; }
5545 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
5546
5547 struct dentry *tracing_init_dentry_tr(struct trace_array *tr)
5548 {
5549         if (tr->dir)
5550                 return tr->dir;
5551
5552         if (!debugfs_initialized())
5553                 return NULL;
5554
5555         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
5556                 tr->dir = debugfs_create_dir("tracing", NULL);
5557
5558         if (!tr->dir)
5559                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
5560
5561         return tr->dir;
5562 }
5563
5564 struct dentry *tracing_init_dentry(void)
5565 {
5566         return tracing_init_dentry_tr(&global_trace);
5567 }
5568
5569 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
5570 {
5571         struct dentry *d_tracer;
5572
5573         if (tr->percpu_dir)
5574                 return tr->percpu_dir;
5575
5576         d_tracer = tracing_init_dentry_tr(tr);
5577         if (!d_tracer)
5578                 return NULL;
5579
5580         tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer);
5581
5582         WARN_ONCE(!tr->percpu_dir,
5583                   "Could not create debugfs directory 'per_cpu/%d'\n", cpu);
5584
5585         return tr->percpu_dir;
5586 }
5587
5588 static struct dentry *
5589 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
5590                       void *data, long cpu, const struct file_operations *fops)
5591 {
5592         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
5593
5594         if (ret) /* See tracing_get_cpu() */
5595                 ret->d_inode->i_cdev = (void *)(cpu + 1);
5596         return ret;
5597 }
5598
5599 static void
5600 tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
5601 {
5602         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
5603         struct dentry *d_cpu;
5604         char cpu_dir[30]; /* 30 characters should be more than enough */
5605
5606         if (!d_percpu)
5607                 return;
5608
5609         snprintf(cpu_dir, 30, "cpu%ld", cpu);
5610         d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
5611         if (!d_cpu) {
5612                 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
5613                 return;
5614         }
5615
5616         /* per cpu trace_pipe */
5617         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
5618                                 tr, cpu, &tracing_pipe_fops);
5619
5620         /* per cpu trace */
5621         trace_create_cpu_file("trace", 0644, d_cpu,
5622                                 tr, cpu, &tracing_fops);
5623
5624         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
5625                                 tr, cpu, &tracing_buffers_fops);
5626
5627         trace_create_cpu_file("stats", 0444, d_cpu,
5628                                 tr, cpu, &tracing_stats_fops);
5629
5630         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
5631                                 tr, cpu, &tracing_entries_fops);
5632
5633 #ifdef CONFIG_TRACER_SNAPSHOT
5634         trace_create_cpu_file("snapshot", 0644, d_cpu,
5635                                 tr, cpu, &snapshot_fops);
5636
5637         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
5638                                 tr, cpu, &snapshot_raw_fops);
5639 #endif
5640 }
5641
5642 #ifdef CONFIG_FTRACE_SELFTEST
5643 /* Let selftest have access to static functions in this file */
5644 #include "trace_selftest.c"
5645 #endif
5646
5647 struct trace_option_dentry {
5648         struct tracer_opt               *opt;
5649         struct tracer_flags             *flags;
5650         struct trace_array              *tr;
5651         struct dentry                   *entry;
5652 };
5653
5654 static ssize_t
5655 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
5656                         loff_t *ppos)
5657 {
5658         struct trace_option_dentry *topt = filp->private_data;
5659         char *buf;
5660
5661         if (topt->flags->val & topt->opt->bit)
5662                 buf = "1\n";
5663         else
5664                 buf = "0\n";
5665
5666         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5667 }
5668
5669 static ssize_t
5670 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
5671                          loff_t *ppos)
5672 {
5673         struct trace_option_dentry *topt = filp->private_data;
5674         unsigned long val;
5675         int ret;
5676
5677         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5678         if (ret)
5679                 return ret;
5680
5681         if (val != 0 && val != 1)
5682                 return -EINVAL;
5683
5684         if (!!(topt->flags->val & topt->opt->bit) != val) {
5685                 mutex_lock(&trace_types_lock);
5686                 ret = __set_tracer_option(topt->tr->current_trace, topt->flags,
5687                                           topt->opt, !val);
5688                 mutex_unlock(&trace_types_lock);
5689                 if (ret)
5690                         return ret;
5691         }
5692
5693         *ppos += cnt;
5694
5695         return cnt;
5696 }
5697
5698
5699 static const struct file_operations trace_options_fops = {
5700         .open = tracing_open_generic,
5701         .read = trace_options_read,
5702         .write = trace_options_write,
5703         .llseek = generic_file_llseek,
5704 };
5705
5706 static ssize_t
5707 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
5708                         loff_t *ppos)
5709 {
5710         long index = (long)filp->private_data;
5711         char *buf;
5712
5713         if (trace_flags & (1 << index))
5714                 buf = "1\n";
5715         else
5716                 buf = "0\n";
5717
5718         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5719 }
5720
5721 static ssize_t
5722 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
5723                          loff_t *ppos)
5724 {
5725         struct trace_array *tr = &global_trace;
5726         long index = (long)filp->private_data;
5727         unsigned long val;
5728         int ret;
5729
5730         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5731         if (ret)
5732                 return ret;
5733
5734         if (val != 0 && val != 1)
5735                 return -EINVAL;
5736
5737         mutex_lock(&trace_types_lock);
5738         ret = set_tracer_flag(tr, 1 << index, val);
5739         mutex_unlock(&trace_types_lock);
5740
5741         if (ret < 0)
5742                 return ret;
5743
5744         *ppos += cnt;
5745
5746         return cnt;
5747 }
5748
5749 static const struct file_operations trace_options_core_fops = {
5750         .open = tracing_open_generic,
5751         .read = trace_options_core_read,
5752         .write = trace_options_core_write,
5753         .llseek = generic_file_llseek,
5754 };
5755
5756 struct dentry *trace_create_file(const char *name,
5757                                  umode_t mode,
5758                                  struct dentry *parent,
5759                                  void *data,
5760                                  const struct file_operations *fops)
5761 {
5762         struct dentry *ret;
5763
5764         ret = debugfs_create_file(name, mode, parent, data, fops);
5765         if (!ret)
5766                 pr_warning("Could not create debugfs '%s' entry\n", name);
5767
5768         return ret;
5769 }
5770
5771
5772 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
5773 {
5774         struct dentry *d_tracer;
5775
5776         if (tr->options)
5777                 return tr->options;
5778
5779         d_tracer = tracing_init_dentry_tr(tr);
5780         if (!d_tracer)
5781                 return NULL;
5782
5783         tr->options = debugfs_create_dir("options", d_tracer);
5784         if (!tr->options) {
5785                 pr_warning("Could not create debugfs directory 'options'\n");
5786                 return NULL;
5787         }
5788
5789         return tr->options;
5790 }
5791
5792 static void
5793 create_trace_option_file(struct trace_array *tr,
5794                          struct trace_option_dentry *topt,
5795                          struct tracer_flags *flags,
5796                          struct tracer_opt *opt)
5797 {
5798         struct dentry *t_options;
5799
5800         t_options = trace_options_init_dentry(tr);
5801         if (!t_options)
5802                 return;
5803
5804         topt->flags = flags;
5805         topt->opt = opt;
5806         topt->tr = tr;
5807
5808         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
5809                                     &trace_options_fops);
5810
5811 }
5812
5813 static struct trace_option_dentry *
5814 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
5815 {
5816         struct trace_option_dentry *topts;
5817         struct tracer_flags *flags;
5818         struct tracer_opt *opts;
5819         int cnt;
5820
5821         if (!tracer)
5822                 return NULL;
5823
5824         flags = tracer->flags;
5825
5826         if (!flags || !flags->opts)
5827                 return NULL;
5828
5829         opts = flags->opts;
5830
5831         for (cnt = 0; opts[cnt].name; cnt++)
5832                 ;
5833
5834         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
5835         if (!topts)
5836                 return NULL;
5837
5838         for (cnt = 0; opts[cnt].name; cnt++)
5839                 create_trace_option_file(tr, &topts[cnt], flags,
5840                                          &opts[cnt]);
5841
5842         return topts;
5843 }
5844
5845 static void
5846 destroy_trace_option_files(struct trace_option_dentry *topts)
5847 {
5848         int cnt;
5849
5850         if (!topts)
5851                 return;
5852
5853         for (cnt = 0; topts[cnt].opt; cnt++) {
5854                 if (topts[cnt].entry)
5855                         debugfs_remove(topts[cnt].entry);
5856         }
5857
5858         kfree(topts);
5859 }
5860
5861 static struct dentry *
5862 create_trace_option_core_file(struct trace_array *tr,
5863                               const char *option, long index)
5864 {
5865         struct dentry *t_options;
5866
5867         t_options = trace_options_init_dentry(tr);
5868         if (!t_options)
5869                 return NULL;
5870
5871         return trace_create_file(option, 0644, t_options, (void *)index,
5872                                     &trace_options_core_fops);
5873 }
5874
5875 static __init void create_trace_options_dir(struct trace_array *tr)
5876 {
5877         struct dentry *t_options;
5878         int i;
5879
5880         t_options = trace_options_init_dentry(tr);
5881         if (!t_options)
5882                 return;
5883
5884         for (i = 0; trace_options[i]; i++)
5885                 create_trace_option_core_file(tr, trace_options[i], i);
5886 }
5887
5888 static ssize_t
5889 rb_simple_read(struct file *filp, char __user *ubuf,
5890                size_t cnt, loff_t *ppos)
5891 {
5892         struct trace_array *tr = filp->private_data;
5893         char buf[64];
5894         int r;
5895
5896         r = tracer_tracing_is_on(tr);
5897         r = sprintf(buf, "%d\n", r);
5898
5899         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5900 }
5901
5902 static ssize_t
5903 rb_simple_write(struct file *filp, const char __user *ubuf,
5904                 size_t cnt, loff_t *ppos)
5905 {
5906         struct trace_array *tr = filp->private_data;
5907         struct ring_buffer *buffer = tr->trace_buffer.buffer;
5908         unsigned long val;
5909         int ret;
5910
5911         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5912         if (ret)
5913                 return ret;
5914
5915         if (buffer) {
5916                 mutex_lock(&trace_types_lock);
5917                 if (val) {
5918                         tracer_tracing_on(tr);
5919                         if (tr->current_trace->start)
5920                                 tr->current_trace->start(tr);
5921                 } else {
5922                         tracer_tracing_off(tr);
5923                         if (tr->current_trace->stop)
5924                                 tr->current_trace->stop(tr);
5925                 }
5926                 mutex_unlock(&trace_types_lock);
5927         }
5928
5929         (*ppos)++;
5930
5931         return cnt;
5932 }
5933
5934 static const struct file_operations rb_simple_fops = {
5935         .open           = tracing_open_generic_tr,
5936         .read           = rb_simple_read,
5937         .write          = rb_simple_write,
5938         .release        = tracing_release_generic_tr,
5939         .llseek         = default_llseek,
5940 };
5941
5942 struct dentry *trace_instance_dir;
5943
5944 static void
5945 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer);
5946
5947 static int
5948 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
5949 {
5950         enum ring_buffer_flags rb_flags;
5951
5952         rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
5953
5954         buf->tr = tr;
5955
5956         buf->buffer = ring_buffer_alloc(size, rb_flags);
5957         if (!buf->buffer)
5958                 return -ENOMEM;
5959
5960         buf->data = alloc_percpu(struct trace_array_cpu);
5961         if (!buf->data) {
5962                 ring_buffer_free(buf->buffer);
5963                 return -ENOMEM;
5964         }
5965
5966         /* Allocate the first page for all buffers */
5967         set_buffer_entries(&tr->trace_buffer,
5968                            ring_buffer_size(tr->trace_buffer.buffer, 0));
5969
5970         return 0;
5971 }
5972
5973 static int allocate_trace_buffers(struct trace_array *tr, int size)
5974 {
5975         int ret;
5976
5977         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
5978         if (ret)
5979                 return ret;
5980
5981 #ifdef CONFIG_TRACER_MAX_TRACE
5982         ret = allocate_trace_buffer(tr, &tr->max_buffer,
5983                                     allocate_snapshot ? size : 1);
5984         if (WARN_ON(ret)) {
5985                 ring_buffer_free(tr->trace_buffer.buffer);
5986                 free_percpu(tr->trace_buffer.data);
5987                 return -ENOMEM;
5988         }
5989         tr->allocated_snapshot = allocate_snapshot;
5990
5991         /*
5992          * Only the top level trace array gets its snapshot allocated
5993          * from the kernel command line.
5994          */
5995         allocate_snapshot = false;
5996 #endif
5997         return 0;
5998 }
5999
6000 static int new_instance_create(const char *name)
6001 {
6002         struct trace_array *tr;
6003         int ret;
6004
6005         mutex_lock(&trace_types_lock);
6006
6007         ret = -EEXIST;
6008         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6009                 if (tr->name && strcmp(tr->name, name) == 0)
6010                         goto out_unlock;
6011         }
6012
6013         ret = -ENOMEM;
6014         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6015         if (!tr)
6016                 goto out_unlock;
6017
6018         tr->name = kstrdup(name, GFP_KERNEL);
6019         if (!tr->name)
6020                 goto out_free_tr;
6021
6022         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6023                 goto out_free_tr;
6024
6025         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6026
6027         raw_spin_lock_init(&tr->start_lock);
6028
6029         tr->current_trace = &nop_trace;
6030
6031         INIT_LIST_HEAD(&tr->systems);
6032         INIT_LIST_HEAD(&tr->events);
6033
6034         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6035                 goto out_free_tr;
6036
6037         tr->dir = debugfs_create_dir(name, trace_instance_dir);
6038         if (!tr->dir)
6039                 goto out_free_tr;
6040
6041         ret = event_trace_add_tracer(tr->dir, tr);
6042         if (ret) {
6043                 debugfs_remove_recursive(tr->dir);
6044                 goto out_free_tr;
6045         }
6046
6047         init_tracer_debugfs(tr, tr->dir);
6048
6049         list_add(&tr->list, &ftrace_trace_arrays);
6050
6051         mutex_unlock(&trace_types_lock);
6052
6053         return 0;
6054
6055  out_free_tr:
6056         if (tr->trace_buffer.buffer)
6057                 ring_buffer_free(tr->trace_buffer.buffer);
6058         free_cpumask_var(tr->tracing_cpumask);
6059         kfree(tr->name);
6060         kfree(tr);
6061
6062  out_unlock:
6063         mutex_unlock(&trace_types_lock);
6064
6065         return ret;
6066
6067 }
6068
6069 static int instance_delete(const char *name)
6070 {
6071         struct trace_array *tr;
6072         int found = 0;
6073         int ret;
6074
6075         mutex_lock(&trace_types_lock);
6076
6077         ret = -ENODEV;
6078         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6079                 if (tr->name && strcmp(tr->name, name) == 0) {
6080                         found = 1;
6081                         break;
6082                 }
6083         }
6084         if (!found)
6085                 goto out_unlock;
6086
6087         ret = -EBUSY;
6088         if (tr->ref)
6089                 goto out_unlock;
6090
6091         list_del(&tr->list);
6092
6093         event_trace_del_tracer(tr);
6094         debugfs_remove_recursive(tr->dir);
6095         free_percpu(tr->trace_buffer.data);
6096         ring_buffer_free(tr->trace_buffer.buffer);
6097
6098         kfree(tr->name);
6099         kfree(tr);
6100
6101         ret = 0;
6102
6103  out_unlock:
6104         mutex_unlock(&trace_types_lock);
6105
6106         return ret;
6107 }
6108
6109 static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t mode)
6110 {
6111         struct dentry *parent;
6112         int ret;
6113
6114         /* Paranoid: Make sure the parent is the "instances" directory */
6115         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6116         if (WARN_ON_ONCE(parent != trace_instance_dir))
6117                 return -ENOENT;
6118
6119         /*
6120          * The inode mutex is locked, but debugfs_create_dir() will also
6121          * take the mutex. As the instances directory can not be destroyed
6122          * or changed in any other way, it is safe to unlock it, and
6123          * let the dentry try. If two users try to make the same dir at
6124          * the same time, then the new_instance_create() will determine the
6125          * winner.
6126          */
6127         mutex_unlock(&inode->i_mutex);
6128
6129         ret = new_instance_create(dentry->d_iname);
6130
6131         mutex_lock(&inode->i_mutex);
6132
6133         return ret;
6134 }
6135
6136 static int instance_rmdir(struct inode *inode, struct dentry *dentry)
6137 {
6138         struct dentry *parent;
6139         int ret;
6140
6141         /* Paranoid: Make sure the parent is the "instances" directory */
6142         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6143         if (WARN_ON_ONCE(parent != trace_instance_dir))
6144                 return -ENOENT;
6145
6146         /* The caller did a dget() on dentry */
6147         mutex_unlock(&dentry->d_inode->i_mutex);
6148
6149         /*
6150          * The inode mutex is locked, but debugfs_create_dir() will also
6151          * take the mutex. As the instances directory can not be destroyed
6152          * or changed in any other way, it is safe to unlock it, and
6153          * let the dentry try. If two users try to make the same dir at
6154          * the same time, then the instance_delete() will determine the
6155          * winner.
6156          */
6157         mutex_unlock(&inode->i_mutex);
6158
6159         ret = instance_delete(dentry->d_iname);
6160
6161         mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
6162         mutex_lock(&dentry->d_inode->i_mutex);
6163
6164         return ret;
6165 }
6166
6167 static const struct inode_operations instance_dir_inode_operations = {
6168         .lookup         = simple_lookup,
6169         .mkdir          = instance_mkdir,
6170         .rmdir          = instance_rmdir,
6171 };
6172
6173 static __init void create_trace_instances(struct dentry *d_tracer)
6174 {
6175         trace_instance_dir = debugfs_create_dir("instances", d_tracer);
6176         if (WARN_ON(!trace_instance_dir))
6177                 return;
6178
6179         /* Hijack the dir inode operations, to allow mkdir */
6180         trace_instance_dir->d_inode->i_op = &instance_dir_inode_operations;
6181 }
6182
6183 static void
6184 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
6185 {
6186         int cpu;
6187
6188         trace_create_file("tracing_cpumask", 0644, d_tracer,
6189                           tr, &tracing_cpumask_fops);
6190
6191         trace_create_file("trace_options", 0644, d_tracer,
6192                           tr, &tracing_iter_fops);
6193
6194         trace_create_file("trace", 0644, d_tracer,
6195                           tr, &tracing_fops);
6196
6197         trace_create_file("trace_pipe", 0444, d_tracer,
6198                           tr, &tracing_pipe_fops);
6199
6200         trace_create_file("buffer_size_kb", 0644, d_tracer,
6201                           tr, &tracing_entries_fops);
6202
6203         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6204                           tr, &tracing_total_entries_fops);
6205
6206         trace_create_file("free_buffer", 0200, d_tracer,
6207                           tr, &tracing_free_buffer_fops);
6208
6209         trace_create_file("trace_marker", 0220, d_tracer,
6210                           tr, &tracing_mark_fops);
6211
6212         trace_create_file("trace_clock", 0644, d_tracer, tr,
6213                           &trace_clock_fops);
6214
6215         trace_create_file("tracing_on", 0644, d_tracer,
6216                           tr, &rb_simple_fops);
6217
6218 #ifdef CONFIG_TRACER_SNAPSHOT
6219         trace_create_file("snapshot", 0644, d_tracer,
6220                           tr, &snapshot_fops);
6221 #endif
6222
6223         for_each_tracing_cpu(cpu)
6224                 tracing_init_debugfs_percpu(tr, cpu);
6225
6226 }
6227
6228 static __init int tracer_init_debugfs(void)
6229 {
6230         struct dentry *d_tracer;
6231
6232         trace_access_lock_init();
6233
6234         d_tracer = tracing_init_dentry();
6235         if (!d_tracer)
6236                 return 0;
6237
6238         init_tracer_debugfs(&global_trace, d_tracer);
6239
6240         trace_create_file("available_tracers", 0444, d_tracer,
6241                         &global_trace, &show_traces_fops);
6242
6243         trace_create_file("current_tracer", 0644, d_tracer,
6244                         &global_trace, &set_tracer_fops);
6245
6246 #ifdef CONFIG_TRACER_MAX_TRACE
6247         trace_create_file("tracing_max_latency", 0644, d_tracer,
6248                         &tracing_max_latency, &tracing_max_lat_fops);
6249 #endif
6250
6251         trace_create_file("tracing_thresh", 0644, d_tracer,
6252                         &tracing_thresh, &tracing_max_lat_fops);
6253
6254         trace_create_file("README", 0444, d_tracer,
6255                         NULL, &tracing_readme_fops);
6256
6257         trace_create_file("saved_cmdlines", 0444, d_tracer,
6258                         NULL, &tracing_saved_cmdlines_fops);
6259
6260 #ifdef CONFIG_DYNAMIC_FTRACE
6261         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
6262                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
6263 #endif
6264
6265         create_trace_instances(d_tracer);
6266
6267         create_trace_options_dir(&global_trace);
6268
6269         return 0;
6270 }
6271
6272 static int trace_panic_handler(struct notifier_block *this,
6273                                unsigned long event, void *unused)
6274 {
6275         if (ftrace_dump_on_oops)
6276                 ftrace_dump(ftrace_dump_on_oops);
6277         return NOTIFY_OK;
6278 }
6279
6280 static struct notifier_block trace_panic_notifier = {
6281         .notifier_call  = trace_panic_handler,
6282         .next           = NULL,
6283         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
6284 };
6285
6286 static int trace_die_handler(struct notifier_block *self,
6287                              unsigned long val,
6288                              void *data)
6289 {
6290         switch (val) {
6291         case DIE_OOPS:
6292                 if (ftrace_dump_on_oops)
6293                         ftrace_dump(ftrace_dump_on_oops);
6294                 break;
6295         default:
6296                 break;
6297         }
6298         return NOTIFY_OK;
6299 }
6300
6301 static struct notifier_block trace_die_notifier = {
6302         .notifier_call = trace_die_handler,
6303         .priority = 200
6304 };
6305
6306 /*
6307  * printk is set to max of 1024, we really don't need it that big.
6308  * Nothing should be printing 1000 characters anyway.
6309  */
6310 #define TRACE_MAX_PRINT         1000
6311
6312 /*
6313  * Define here KERN_TRACE so that we have one place to modify
6314  * it if we decide to change what log level the ftrace dump
6315  * should be at.
6316  */
6317 #define KERN_TRACE              KERN_EMERG
6318
6319 void
6320 trace_printk_seq(struct trace_seq *s)
6321 {
6322         /* Probably should print a warning here. */
6323         if (s->len >= TRACE_MAX_PRINT)
6324                 s->len = TRACE_MAX_PRINT;
6325
6326         /* should be zero ended, but we are paranoid. */
6327         s->buffer[s->len] = 0;
6328
6329         printk(KERN_TRACE "%s", s->buffer);
6330
6331         trace_seq_init(s);
6332 }
6333
6334 void trace_init_global_iter(struct trace_iterator *iter)
6335 {
6336         iter->tr = &global_trace;
6337         iter->trace = iter->tr->current_trace;
6338         iter->cpu_file = RING_BUFFER_ALL_CPUS;
6339         iter->trace_buffer = &global_trace.trace_buffer;
6340
6341         if (iter->trace && iter->trace->open)
6342                 iter->trace->open(iter);
6343
6344         /* Annotate start of buffers if we had overruns */
6345         if (ring_buffer_overruns(iter->trace_buffer->buffer))
6346                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
6347
6348         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6349         if (trace_clocks[iter->tr->clock_id].in_ns)
6350                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6351 }
6352
6353 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
6354 {
6355         /* use static because iter can be a bit big for the stack */
6356         static struct trace_iterator iter;
6357         static atomic_t dump_running;
6358         unsigned int old_userobj;
6359         unsigned long flags;
6360         int cnt = 0, cpu;
6361
6362         /* Only allow one dump user at a time. */
6363         if (atomic_inc_return(&dump_running) != 1) {
6364                 atomic_dec(&dump_running);
6365                 return;
6366         }
6367
6368         /*
6369          * Always turn off tracing when we dump.
6370          * We don't need to show trace output of what happens
6371          * between multiple crashes.
6372          *
6373          * If the user does a sysrq-z, then they can re-enable
6374          * tracing with echo 1 > tracing_on.
6375          */
6376         tracing_off();
6377
6378         local_irq_save(flags);
6379
6380         /* Simulate the iterator */
6381         trace_init_global_iter(&iter);
6382
6383         for_each_tracing_cpu(cpu) {
6384                 atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled);
6385         }
6386
6387         old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
6388
6389         /* don't look at user memory in panic mode */
6390         trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
6391
6392         switch (oops_dump_mode) {
6393         case DUMP_ALL:
6394                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6395                 break;
6396         case DUMP_ORIG:
6397                 iter.cpu_file = raw_smp_processor_id();
6398                 break;
6399         case DUMP_NONE:
6400                 goto out_enable;
6401         default:
6402                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
6403                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6404         }
6405
6406         printk(KERN_TRACE "Dumping ftrace buffer:\n");
6407
6408         /* Did function tracer already get disabled? */
6409         if (ftrace_is_dead()) {
6410                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
6411                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
6412         }
6413
6414         /*
6415          * We need to stop all tracing on all CPUS to read the
6416          * the next buffer. This is a bit expensive, but is
6417          * not done often. We fill all what we can read,
6418          * and then release the locks again.
6419          */
6420
6421         while (!trace_empty(&iter)) {
6422
6423                 if (!cnt)
6424                         printk(KERN_TRACE "---------------------------------\n");
6425
6426                 cnt++;
6427
6428                 /* reset all but tr, trace, and overruns */
6429                 memset(&iter.seq, 0,
6430                        sizeof(struct trace_iterator) -
6431                        offsetof(struct trace_iterator, seq));
6432                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
6433                 iter.pos = -1;
6434
6435                 if (trace_find_next_entry_inc(&iter) != NULL) {
6436                         int ret;
6437
6438                         ret = print_trace_line(&iter);
6439                         if (ret != TRACE_TYPE_NO_CONSUME)
6440                                 trace_consume(&iter);
6441                 }
6442                 touch_nmi_watchdog();
6443
6444                 trace_printk_seq(&iter.seq);
6445         }
6446
6447         if (!cnt)
6448                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
6449         else
6450                 printk(KERN_TRACE "---------------------------------\n");
6451
6452  out_enable:
6453         trace_flags |= old_userobj;
6454
6455         for_each_tracing_cpu(cpu) {
6456                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
6457         }
6458         atomic_dec(&dump_running);
6459         local_irq_restore(flags);
6460 }
6461 EXPORT_SYMBOL_GPL(ftrace_dump);
6462
6463 __init static int tracer_alloc_buffers(void)
6464 {
6465         int ring_buf_size;
6466         int ret = -ENOMEM;
6467
6468
6469         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
6470                 goto out;
6471
6472         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
6473                 goto out_free_buffer_mask;
6474
6475         /* Only allocate trace_printk buffers if a trace_printk exists */
6476         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
6477                 /* Must be called before global_trace.buffer is allocated */
6478                 trace_printk_init_buffers();
6479
6480         /* To save memory, keep the ring buffer size to its minimum */
6481         if (ring_buffer_expanded)
6482                 ring_buf_size = trace_buf_size;
6483         else
6484                 ring_buf_size = 1;
6485
6486         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
6487         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
6488
6489         raw_spin_lock_init(&global_trace.start_lock);
6490
6491         /* TODO: make the number of buffers hot pluggable with CPUS */
6492         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
6493                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
6494                 WARN_ON(1);
6495                 goto out_free_cpumask;
6496         }
6497
6498         if (global_trace.buffer_disabled)
6499                 tracing_off();
6500
6501         trace_init_cmdlines();
6502
6503         /*
6504          * register_tracer() might reference current_trace, so it
6505          * needs to be set before we register anything. This is
6506          * just a bootstrap of current_trace anyway.
6507          */
6508         global_trace.current_trace = &nop_trace;
6509
6510         register_tracer(&nop_trace);
6511
6512         /* All seems OK, enable tracing */
6513         tracing_disabled = 0;
6514
6515         atomic_notifier_chain_register(&panic_notifier_list,
6516                                        &trace_panic_notifier);
6517
6518         register_die_notifier(&trace_die_notifier);
6519
6520         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
6521
6522         INIT_LIST_HEAD(&global_trace.systems);
6523         INIT_LIST_HEAD(&global_trace.events);
6524         list_add(&global_trace.list, &ftrace_trace_arrays);
6525
6526         while (trace_boot_options) {
6527                 char *option;
6528
6529                 option = strsep(&trace_boot_options, ",");
6530                 trace_set_options(&global_trace, option);
6531         }
6532
6533         register_snapshot_cmd();
6534
6535         return 0;
6536
6537 out_free_cpumask:
6538         free_percpu(global_trace.trace_buffer.data);
6539 #ifdef CONFIG_TRACER_MAX_TRACE
6540         free_percpu(global_trace.max_buffer.data);
6541 #endif
6542         free_cpumask_var(global_trace.tracing_cpumask);
6543 out_free_buffer_mask:
6544         free_cpumask_var(tracing_buffer_mask);
6545 out:
6546         return ret;
6547 }
6548
6549 __init static int clear_boot_tracer(void)
6550 {
6551         /*
6552          * The default tracer at boot buffer is an init section.
6553          * This function is called in lateinit. If we did not
6554          * find the boot tracer, then clear it out, to prevent
6555          * later registration from accessing the buffer that is
6556          * about to be freed.
6557          */
6558         if (!default_bootup_tracer)
6559                 return 0;
6560
6561         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
6562                default_bootup_tracer);
6563         default_bootup_tracer = NULL;
6564
6565         return 0;
6566 }
6567
6568 early_initcall(tracer_alloc_buffers);
6569 fs_initcall(tracer_init_debugfs);
6570 late_initcall(clear_boot_tracer);