]> Pileus Git - ~andy/linux/blob - kernel/trace/trace.c
815c878f409bd94e08777d1b9f83b1553f4a2e24
[~andy/linux] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/pagemap.h>
24 #include <linux/hardirq.h>
25 #include <linux/linkage.h>
26 #include <linux/uaccess.h>
27 #include <linux/kprobes.h>
28 #include <linux/ftrace.h>
29 #include <linux/module.h>
30 #include <linux/percpu.h>
31 #include <linux/splice.h>
32 #include <linux/kdebug.h>
33 #include <linux/string.h>
34 #include <linux/rwsem.h>
35 #include <linux/slab.h>
36 #include <linux/ctype.h>
37 #include <linux/init.h>
38 #include <linux/poll.h>
39 #include <linux/nmi.h>
40 #include <linux/fs.h>
41 #include <linux/sched/rt.h>
42
43 #include "trace.h"
44 #include "trace_output.h"
45
46 /*
47  * On boot up, the ring buffer is set to the minimum size, so that
48  * we do not waste memory on systems that are not using tracing.
49  */
50 bool ring_buffer_expanded;
51
52 /*
53  * We need to change this state when a selftest is running.
54  * A selftest will lurk into the ring-buffer to count the
55  * entries inserted during the selftest although some concurrent
56  * insertions into the ring-buffer such as trace_printk could occurred
57  * at the same time, giving false positive or negative results.
58  */
59 static bool __read_mostly tracing_selftest_running;
60
61 /*
62  * If a tracer is running, we do not want to run SELFTEST.
63  */
64 bool __read_mostly tracing_selftest_disabled;
65
66 /* For tracers that don't implement custom flags */
67 static struct tracer_opt dummy_tracer_opt[] = {
68         { }
69 };
70
71 static struct tracer_flags dummy_tracer_flags = {
72         .val = 0,
73         .opts = dummy_tracer_opt
74 };
75
76 static int dummy_set_flag(u32 old_flags, u32 bit, int set)
77 {
78         return 0;
79 }
80
81 /*
82  * To prevent the comm cache from being overwritten when no
83  * tracing is active, only save the comm when a trace event
84  * occurred.
85  */
86 static DEFINE_PER_CPU(bool, trace_cmdline_save);
87
88 /*
89  * Kill all tracing for good (never come back).
90  * It is initialized to 1 but will turn to zero if the initialization
91  * of the tracer is successful. But that is the only place that sets
92  * this back to zero.
93  */
94 static int tracing_disabled = 1;
95
96 DEFINE_PER_CPU(int, ftrace_cpu_disabled);
97
98 cpumask_var_t __read_mostly     tracing_buffer_mask;
99
100 /*
101  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
102  *
103  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
104  * is set, then ftrace_dump is called. This will output the contents
105  * of the ftrace buffers to the console.  This is very useful for
106  * capturing traces that lead to crashes and outputing it to a
107  * serial console.
108  *
109  * It is default off, but you can enable it with either specifying
110  * "ftrace_dump_on_oops" in the kernel command line, or setting
111  * /proc/sys/kernel/ftrace_dump_on_oops
112  * Set 1 if you want to dump buffers of all CPUs
113  * Set 2 if you want to dump the buffer of the CPU that triggered oops
114  */
115
116 enum ftrace_dump_mode ftrace_dump_on_oops;
117
118 /* When set, tracing will stop when a WARN*() is hit */
119 int __disable_trace_on_warning;
120
121 static int tracing_set_tracer(const char *buf);
122
123 #define MAX_TRACER_SIZE         100
124 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
125 static char *default_bootup_tracer;
126
127 static bool allocate_snapshot;
128
129 static int __init set_cmdline_ftrace(char *str)
130 {
131         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
132         default_bootup_tracer = bootup_tracer_buf;
133         /* We are using ftrace early, expand it */
134         ring_buffer_expanded = true;
135         return 1;
136 }
137 __setup("ftrace=", set_cmdline_ftrace);
138
139 static int __init set_ftrace_dump_on_oops(char *str)
140 {
141         if (*str++ != '=' || !*str) {
142                 ftrace_dump_on_oops = DUMP_ALL;
143                 return 1;
144         }
145
146         if (!strcmp("orig_cpu", str)) {
147                 ftrace_dump_on_oops = DUMP_ORIG;
148                 return 1;
149         }
150
151         return 0;
152 }
153 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
154
155 static int __init stop_trace_on_warning(char *str)
156 {
157         __disable_trace_on_warning = 1;
158         return 1;
159 }
160 __setup("traceoff_on_warning=", stop_trace_on_warning);
161
162 static int __init boot_alloc_snapshot(char *str)
163 {
164         allocate_snapshot = true;
165         /* We also need the main ring buffer expanded */
166         ring_buffer_expanded = true;
167         return 1;
168 }
169 __setup("alloc_snapshot", boot_alloc_snapshot);
170
171
172 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
173 static char *trace_boot_options __initdata;
174
175 static int __init set_trace_boot_options(char *str)
176 {
177         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
178         trace_boot_options = trace_boot_options_buf;
179         return 0;
180 }
181 __setup("trace_options=", set_trace_boot_options);
182
183
184 unsigned long long ns2usecs(cycle_t nsec)
185 {
186         nsec += 500;
187         do_div(nsec, 1000);
188         return nsec;
189 }
190
191 /*
192  * The global_trace is the descriptor that holds the tracing
193  * buffers for the live tracing. For each CPU, it contains
194  * a link list of pages that will store trace entries. The
195  * page descriptor of the pages in the memory is used to hold
196  * the link list by linking the lru item in the page descriptor
197  * to each of the pages in the buffer per CPU.
198  *
199  * For each active CPU there is a data field that holds the
200  * pages for the buffer for that CPU. Each CPU has the same number
201  * of pages allocated for its buffer.
202  */
203 static struct trace_array       global_trace;
204
205 LIST_HEAD(ftrace_trace_arrays);
206
207 int trace_array_get(struct trace_array *this_tr)
208 {
209         struct trace_array *tr;
210         int ret = -ENODEV;
211
212         mutex_lock(&trace_types_lock);
213         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
214                 if (tr == this_tr) {
215                         tr->ref++;
216                         ret = 0;
217                         break;
218                 }
219         }
220         mutex_unlock(&trace_types_lock);
221
222         return ret;
223 }
224
225 static void __trace_array_put(struct trace_array *this_tr)
226 {
227         WARN_ON(!this_tr->ref);
228         this_tr->ref--;
229 }
230
231 void trace_array_put(struct trace_array *this_tr)
232 {
233         mutex_lock(&trace_types_lock);
234         __trace_array_put(this_tr);
235         mutex_unlock(&trace_types_lock);
236 }
237
238 int filter_check_discard(struct ftrace_event_file *file, void *rec,
239                          struct ring_buffer *buffer,
240                          struct ring_buffer_event *event)
241 {
242         if (unlikely(file->flags & FTRACE_EVENT_FL_FILTERED) &&
243             !filter_match_preds(file->filter, rec)) {
244                 ring_buffer_discard_commit(buffer, event);
245                 return 1;
246         }
247
248         return 0;
249 }
250 EXPORT_SYMBOL_GPL(filter_check_discard);
251
252 int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
253                               struct ring_buffer *buffer,
254                               struct ring_buffer_event *event)
255 {
256         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
257             !filter_match_preds(call->filter, rec)) {
258                 ring_buffer_discard_commit(buffer, event);
259                 return 1;
260         }
261
262         return 0;
263 }
264 EXPORT_SYMBOL_GPL(call_filter_check_discard);
265
266 cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
267 {
268         u64 ts;
269
270         /* Early boot up does not have a buffer yet */
271         if (!buf->buffer)
272                 return trace_clock_local();
273
274         ts = ring_buffer_time_stamp(buf->buffer, cpu);
275         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
276
277         return ts;
278 }
279
280 cycle_t ftrace_now(int cpu)
281 {
282         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
283 }
284
285 /**
286  * tracing_is_enabled - Show if global_trace has been disabled
287  *
288  * Shows if the global trace has been enabled or not. It uses the
289  * mirror flag "buffer_disabled" to be used in fast paths such as for
290  * the irqsoff tracer. But it may be inaccurate due to races. If you
291  * need to know the accurate state, use tracing_is_on() which is a little
292  * slower, but accurate.
293  */
294 int tracing_is_enabled(void)
295 {
296         /*
297          * For quick access (irqsoff uses this in fast path), just
298          * return the mirror variable of the state of the ring buffer.
299          * It's a little racy, but we don't really care.
300          */
301         smp_rmb();
302         return !global_trace.buffer_disabled;
303 }
304
305 /*
306  * trace_buf_size is the size in bytes that is allocated
307  * for a buffer. Note, the number of bytes is always rounded
308  * to page size.
309  *
310  * This number is purposely set to a low number of 16384.
311  * If the dump on oops happens, it will be much appreciated
312  * to not have to wait for all that output. Anyway this can be
313  * boot time and run time configurable.
314  */
315 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
316
317 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
318
319 /* trace_types holds a link list of available tracers. */
320 static struct tracer            *trace_types __read_mostly;
321
322 /*
323  * trace_types_lock is used to protect the trace_types list.
324  */
325 DEFINE_MUTEX(trace_types_lock);
326
327 /*
328  * serialize the access of the ring buffer
329  *
330  * ring buffer serializes readers, but it is low level protection.
331  * The validity of the events (which returns by ring_buffer_peek() ..etc)
332  * are not protected by ring buffer.
333  *
334  * The content of events may become garbage if we allow other process consumes
335  * these events concurrently:
336  *   A) the page of the consumed events may become a normal page
337  *      (not reader page) in ring buffer, and this page will be rewrited
338  *      by events producer.
339  *   B) The page of the consumed events may become a page for splice_read,
340  *      and this page will be returned to system.
341  *
342  * These primitives allow multi process access to different cpu ring buffer
343  * concurrently.
344  *
345  * These primitives don't distinguish read-only and read-consume access.
346  * Multi read-only access are also serialized.
347  */
348
349 #ifdef CONFIG_SMP
350 static DECLARE_RWSEM(all_cpu_access_lock);
351 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
352
353 static inline void trace_access_lock(int cpu)
354 {
355         if (cpu == RING_BUFFER_ALL_CPUS) {
356                 /* gain it for accessing the whole ring buffer. */
357                 down_write(&all_cpu_access_lock);
358         } else {
359                 /* gain it for accessing a cpu ring buffer. */
360
361                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
362                 down_read(&all_cpu_access_lock);
363
364                 /* Secondly block other access to this @cpu ring buffer. */
365                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
366         }
367 }
368
369 static inline void trace_access_unlock(int cpu)
370 {
371         if (cpu == RING_BUFFER_ALL_CPUS) {
372                 up_write(&all_cpu_access_lock);
373         } else {
374                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
375                 up_read(&all_cpu_access_lock);
376         }
377 }
378
379 static inline void trace_access_lock_init(void)
380 {
381         int cpu;
382
383         for_each_possible_cpu(cpu)
384                 mutex_init(&per_cpu(cpu_access_lock, cpu));
385 }
386
387 #else
388
389 static DEFINE_MUTEX(access_lock);
390
391 static inline void trace_access_lock(int cpu)
392 {
393         (void)cpu;
394         mutex_lock(&access_lock);
395 }
396
397 static inline void trace_access_unlock(int cpu)
398 {
399         (void)cpu;
400         mutex_unlock(&access_lock);
401 }
402
403 static inline void trace_access_lock_init(void)
404 {
405 }
406
407 #endif
408
409 /* trace_flags holds trace_options default values */
410 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
411         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
412         TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
413         TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION;
414
415 static void tracer_tracing_on(struct trace_array *tr)
416 {
417         if (tr->trace_buffer.buffer)
418                 ring_buffer_record_on(tr->trace_buffer.buffer);
419         /*
420          * This flag is looked at when buffers haven't been allocated
421          * yet, or by some tracers (like irqsoff), that just want to
422          * know if the ring buffer has been disabled, but it can handle
423          * races of where it gets disabled but we still do a record.
424          * As the check is in the fast path of the tracers, it is more
425          * important to be fast than accurate.
426          */
427         tr->buffer_disabled = 0;
428         /* Make the flag seen by readers */
429         smp_wmb();
430 }
431
432 /**
433  * tracing_on - enable tracing buffers
434  *
435  * This function enables tracing buffers that may have been
436  * disabled with tracing_off.
437  */
438 void tracing_on(void)
439 {
440         tracer_tracing_on(&global_trace);
441 }
442 EXPORT_SYMBOL_GPL(tracing_on);
443
444 /**
445  * __trace_puts - write a constant string into the trace buffer.
446  * @ip:    The address of the caller
447  * @str:   The constant string to write
448  * @size:  The size of the string.
449  */
450 int __trace_puts(unsigned long ip, const char *str, int size)
451 {
452         struct ring_buffer_event *event;
453         struct ring_buffer *buffer;
454         struct print_entry *entry;
455         unsigned long irq_flags;
456         int alloc;
457
458         if (unlikely(tracing_selftest_running || tracing_disabled))
459                 return 0;
460
461         alloc = sizeof(*entry) + size + 2; /* possible \n added */
462
463         local_save_flags(irq_flags);
464         buffer = global_trace.trace_buffer.buffer;
465         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
466                                           irq_flags, preempt_count());
467         if (!event)
468                 return 0;
469
470         entry = ring_buffer_event_data(event);
471         entry->ip = ip;
472
473         memcpy(&entry->buf, str, size);
474
475         /* Add a newline if necessary */
476         if (entry->buf[size - 1] != '\n') {
477                 entry->buf[size] = '\n';
478                 entry->buf[size + 1] = '\0';
479         } else
480                 entry->buf[size] = '\0';
481
482         __buffer_unlock_commit(buffer, event);
483
484         return size;
485 }
486 EXPORT_SYMBOL_GPL(__trace_puts);
487
488 /**
489  * __trace_bputs - write the pointer to a constant string into trace buffer
490  * @ip:    The address of the caller
491  * @str:   The constant string to write to the buffer to
492  */
493 int __trace_bputs(unsigned long ip, const char *str)
494 {
495         struct ring_buffer_event *event;
496         struct ring_buffer *buffer;
497         struct bputs_entry *entry;
498         unsigned long irq_flags;
499         int size = sizeof(struct bputs_entry);
500
501         if (unlikely(tracing_selftest_running || tracing_disabled))
502                 return 0;
503
504         local_save_flags(irq_flags);
505         buffer = global_trace.trace_buffer.buffer;
506         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
507                                           irq_flags, preempt_count());
508         if (!event)
509                 return 0;
510
511         entry = ring_buffer_event_data(event);
512         entry->ip                       = ip;
513         entry->str                      = str;
514
515         __buffer_unlock_commit(buffer, event);
516
517         return 1;
518 }
519 EXPORT_SYMBOL_GPL(__trace_bputs);
520
521 #ifdef CONFIG_TRACER_SNAPSHOT
522 /**
523  * trace_snapshot - take a snapshot of the current buffer.
524  *
525  * This causes a swap between the snapshot buffer and the current live
526  * tracing buffer. You can use this to take snapshots of the live
527  * trace when some condition is triggered, but continue to trace.
528  *
529  * Note, make sure to allocate the snapshot with either
530  * a tracing_snapshot_alloc(), or by doing it manually
531  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
532  *
533  * If the snapshot buffer is not allocated, it will stop tracing.
534  * Basically making a permanent snapshot.
535  */
536 void tracing_snapshot(void)
537 {
538         struct trace_array *tr = &global_trace;
539         struct tracer *tracer = tr->current_trace;
540         unsigned long flags;
541
542         if (in_nmi()) {
543                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
544                 internal_trace_puts("*** snapshot is being ignored        ***\n");
545                 return;
546         }
547
548         if (!tr->allocated_snapshot) {
549                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
550                 internal_trace_puts("*** stopping trace here!   ***\n");
551                 tracing_off();
552                 return;
553         }
554
555         /* Note, snapshot can not be used when the tracer uses it */
556         if (tracer->use_max_tr) {
557                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
558                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
559                 return;
560         }
561
562         local_irq_save(flags);
563         update_max_tr(tr, current, smp_processor_id());
564         local_irq_restore(flags);
565 }
566 EXPORT_SYMBOL_GPL(tracing_snapshot);
567
568 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
569                                         struct trace_buffer *size_buf, int cpu_id);
570 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
571
572 static int alloc_snapshot(struct trace_array *tr)
573 {
574         int ret;
575
576         if (!tr->allocated_snapshot) {
577
578                 /* allocate spare buffer */
579                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
580                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
581                 if (ret < 0)
582                         return ret;
583
584                 tr->allocated_snapshot = true;
585         }
586
587         return 0;
588 }
589
590 void free_snapshot(struct trace_array *tr)
591 {
592         /*
593          * We don't free the ring buffer. instead, resize it because
594          * The max_tr ring buffer has some state (e.g. ring->clock) and
595          * we want preserve it.
596          */
597         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
598         set_buffer_entries(&tr->max_buffer, 1);
599         tracing_reset_online_cpus(&tr->max_buffer);
600         tr->allocated_snapshot = false;
601 }
602
603 /**
604  * tracing_alloc_snapshot - allocate snapshot buffer.
605  *
606  * This only allocates the snapshot buffer if it isn't already
607  * allocated - it doesn't also take a snapshot.
608  *
609  * This is meant to be used in cases where the snapshot buffer needs
610  * to be set up for events that can't sleep but need to be able to
611  * trigger a snapshot.
612  */
613 int tracing_alloc_snapshot(void)
614 {
615         struct trace_array *tr = &global_trace;
616         int ret;
617
618         ret = alloc_snapshot(tr);
619         WARN_ON(ret < 0);
620
621         return ret;
622 }
623 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
624
625 /**
626  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
627  *
628  * This is similar to trace_snapshot(), but it will allocate the
629  * snapshot buffer if it isn't already allocated. Use this only
630  * where it is safe to sleep, as the allocation may sleep.
631  *
632  * This causes a swap between the snapshot buffer and the current live
633  * tracing buffer. You can use this to take snapshots of the live
634  * trace when some condition is triggered, but continue to trace.
635  */
636 void tracing_snapshot_alloc(void)
637 {
638         int ret;
639
640         ret = tracing_alloc_snapshot();
641         if (ret < 0)
642                 return;
643
644         tracing_snapshot();
645 }
646 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
647 #else
648 void tracing_snapshot(void)
649 {
650         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
651 }
652 EXPORT_SYMBOL_GPL(tracing_snapshot);
653 int tracing_alloc_snapshot(void)
654 {
655         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
656         return -ENODEV;
657 }
658 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
659 void tracing_snapshot_alloc(void)
660 {
661         /* Give warning */
662         tracing_snapshot();
663 }
664 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
665 #endif /* CONFIG_TRACER_SNAPSHOT */
666
667 static void tracer_tracing_off(struct trace_array *tr)
668 {
669         if (tr->trace_buffer.buffer)
670                 ring_buffer_record_off(tr->trace_buffer.buffer);
671         /*
672          * This flag is looked at when buffers haven't been allocated
673          * yet, or by some tracers (like irqsoff), that just want to
674          * know if the ring buffer has been disabled, but it can handle
675          * races of where it gets disabled but we still do a record.
676          * As the check is in the fast path of the tracers, it is more
677          * important to be fast than accurate.
678          */
679         tr->buffer_disabled = 1;
680         /* Make the flag seen by readers */
681         smp_wmb();
682 }
683
684 /**
685  * tracing_off - turn off tracing buffers
686  *
687  * This function stops the tracing buffers from recording data.
688  * It does not disable any overhead the tracers themselves may
689  * be causing. This function simply causes all recording to
690  * the ring buffers to fail.
691  */
692 void tracing_off(void)
693 {
694         tracer_tracing_off(&global_trace);
695 }
696 EXPORT_SYMBOL_GPL(tracing_off);
697
698 void disable_trace_on_warning(void)
699 {
700         if (__disable_trace_on_warning)
701                 tracing_off();
702 }
703
704 /**
705  * tracer_tracing_is_on - show real state of ring buffer enabled
706  * @tr : the trace array to know if ring buffer is enabled
707  *
708  * Shows real state of the ring buffer if it is enabled or not.
709  */
710 static int tracer_tracing_is_on(struct trace_array *tr)
711 {
712         if (tr->trace_buffer.buffer)
713                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
714         return !tr->buffer_disabled;
715 }
716
717 /**
718  * tracing_is_on - show state of ring buffers enabled
719  */
720 int tracing_is_on(void)
721 {
722         return tracer_tracing_is_on(&global_trace);
723 }
724 EXPORT_SYMBOL_GPL(tracing_is_on);
725
726 static int __init set_buf_size(char *str)
727 {
728         unsigned long buf_size;
729
730         if (!str)
731                 return 0;
732         buf_size = memparse(str, &str);
733         /* nr_entries can not be zero */
734         if (buf_size == 0)
735                 return 0;
736         trace_buf_size = buf_size;
737         return 1;
738 }
739 __setup("trace_buf_size=", set_buf_size);
740
741 static int __init set_tracing_thresh(char *str)
742 {
743         unsigned long threshold;
744         int ret;
745
746         if (!str)
747                 return 0;
748         ret = kstrtoul(str, 0, &threshold);
749         if (ret < 0)
750                 return 0;
751         tracing_thresh = threshold * 1000;
752         return 1;
753 }
754 __setup("tracing_thresh=", set_tracing_thresh);
755
756 unsigned long nsecs_to_usecs(unsigned long nsecs)
757 {
758         return nsecs / 1000;
759 }
760
761 /* These must match the bit postions in trace_iterator_flags */
762 static const char *trace_options[] = {
763         "print-parent",
764         "sym-offset",
765         "sym-addr",
766         "verbose",
767         "raw",
768         "hex",
769         "bin",
770         "block",
771         "stacktrace",
772         "trace_printk",
773         "ftrace_preempt",
774         "branch",
775         "annotate",
776         "userstacktrace",
777         "sym-userobj",
778         "printk-msg-only",
779         "context-info",
780         "latency-format",
781         "sleep-time",
782         "graph-time",
783         "record-cmd",
784         "overwrite",
785         "disable_on_free",
786         "irq-info",
787         "markers",
788         "function-trace",
789         NULL
790 };
791
792 static struct {
793         u64 (*func)(void);
794         const char *name;
795         int in_ns;              /* is this clock in nanoseconds? */
796 } trace_clocks[] = {
797         { trace_clock_local,    "local",        1 },
798         { trace_clock_global,   "global",       1 },
799         { trace_clock_counter,  "counter",      0 },
800         { trace_clock_jiffies,  "uptime",       1 },
801         { trace_clock,          "perf",         1 },
802         ARCH_TRACE_CLOCKS
803 };
804
805 /*
806  * trace_parser_get_init - gets the buffer for trace parser
807  */
808 int trace_parser_get_init(struct trace_parser *parser, int size)
809 {
810         memset(parser, 0, sizeof(*parser));
811
812         parser->buffer = kmalloc(size, GFP_KERNEL);
813         if (!parser->buffer)
814                 return 1;
815
816         parser->size = size;
817         return 0;
818 }
819
820 /*
821  * trace_parser_put - frees the buffer for trace parser
822  */
823 void trace_parser_put(struct trace_parser *parser)
824 {
825         kfree(parser->buffer);
826 }
827
828 /*
829  * trace_get_user - reads the user input string separated by  space
830  * (matched by isspace(ch))
831  *
832  * For each string found the 'struct trace_parser' is updated,
833  * and the function returns.
834  *
835  * Returns number of bytes read.
836  *
837  * See kernel/trace/trace.h for 'struct trace_parser' details.
838  */
839 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
840         size_t cnt, loff_t *ppos)
841 {
842         char ch;
843         size_t read = 0;
844         ssize_t ret;
845
846         if (!*ppos)
847                 trace_parser_clear(parser);
848
849         ret = get_user(ch, ubuf++);
850         if (ret)
851                 goto out;
852
853         read++;
854         cnt--;
855
856         /*
857          * The parser is not finished with the last write,
858          * continue reading the user input without skipping spaces.
859          */
860         if (!parser->cont) {
861                 /* skip white space */
862                 while (cnt && isspace(ch)) {
863                         ret = get_user(ch, ubuf++);
864                         if (ret)
865                                 goto out;
866                         read++;
867                         cnt--;
868                 }
869
870                 /* only spaces were written */
871                 if (isspace(ch)) {
872                         *ppos += read;
873                         ret = read;
874                         goto out;
875                 }
876
877                 parser->idx = 0;
878         }
879
880         /* read the non-space input */
881         while (cnt && !isspace(ch)) {
882                 if (parser->idx < parser->size - 1)
883                         parser->buffer[parser->idx++] = ch;
884                 else {
885                         ret = -EINVAL;
886                         goto out;
887                 }
888                 ret = get_user(ch, ubuf++);
889                 if (ret)
890                         goto out;
891                 read++;
892                 cnt--;
893         }
894
895         /* We either got finished input or we have to wait for another call. */
896         if (isspace(ch)) {
897                 parser->buffer[parser->idx] = 0;
898                 parser->cont = false;
899         } else if (parser->idx < parser->size - 1) {
900                 parser->cont = true;
901                 parser->buffer[parser->idx++] = ch;
902         } else {
903                 ret = -EINVAL;
904                 goto out;
905         }
906
907         *ppos += read;
908         ret = read;
909
910 out:
911         return ret;
912 }
913
914 ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
915 {
916         int len;
917         int ret;
918
919         if (!cnt)
920                 return 0;
921
922         if (s->len <= s->readpos)
923                 return -EBUSY;
924
925         len = s->len - s->readpos;
926         if (cnt > len)
927                 cnt = len;
928         ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
929         if (ret == cnt)
930                 return -EFAULT;
931
932         cnt -= ret;
933
934         s->readpos += cnt;
935         return cnt;
936 }
937
938 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
939 {
940         int len;
941
942         if (s->len <= s->readpos)
943                 return -EBUSY;
944
945         len = s->len - s->readpos;
946         if (cnt > len)
947                 cnt = len;
948         memcpy(buf, s->buffer + s->readpos, cnt);
949
950         s->readpos += cnt;
951         return cnt;
952 }
953
954 /*
955  * ftrace_max_lock is used to protect the swapping of buffers
956  * when taking a max snapshot. The buffers themselves are
957  * protected by per_cpu spinlocks. But the action of the swap
958  * needs its own lock.
959  *
960  * This is defined as a arch_spinlock_t in order to help
961  * with performance when lockdep debugging is enabled.
962  *
963  * It is also used in other places outside the update_max_tr
964  * so it needs to be defined outside of the
965  * CONFIG_TRACER_MAX_TRACE.
966  */
967 static arch_spinlock_t ftrace_max_lock =
968         (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
969
970 unsigned long __read_mostly     tracing_thresh;
971
972 #ifdef CONFIG_TRACER_MAX_TRACE
973 unsigned long __read_mostly     tracing_max_latency;
974
975 /*
976  * Copy the new maximum trace into the separate maximum-trace
977  * structure. (this way the maximum trace is permanently saved,
978  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
979  */
980 static void
981 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
982 {
983         struct trace_buffer *trace_buf = &tr->trace_buffer;
984         struct trace_buffer *max_buf = &tr->max_buffer;
985         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
986         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
987
988         max_buf->cpu = cpu;
989         max_buf->time_start = data->preempt_timestamp;
990
991         max_data->saved_latency = tracing_max_latency;
992         max_data->critical_start = data->critical_start;
993         max_data->critical_end = data->critical_end;
994
995         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
996         max_data->pid = tsk->pid;
997         /*
998          * If tsk == current, then use current_uid(), as that does not use
999          * RCU. The irq tracer can be called out of RCU scope.
1000          */
1001         if (tsk == current)
1002                 max_data->uid = current_uid();
1003         else
1004                 max_data->uid = task_uid(tsk);
1005
1006         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1007         max_data->policy = tsk->policy;
1008         max_data->rt_priority = tsk->rt_priority;
1009
1010         /* record this tasks comm */
1011         tracing_record_cmdline(tsk);
1012 }
1013
1014 /**
1015  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1016  * @tr: tracer
1017  * @tsk: the task with the latency
1018  * @cpu: The cpu that initiated the trace.
1019  *
1020  * Flip the buffers between the @tr and the max_tr and record information
1021  * about which task was the cause of this latency.
1022  */
1023 void
1024 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1025 {
1026         struct ring_buffer *buf;
1027
1028         if (tr->stop_count)
1029                 return;
1030
1031         WARN_ON_ONCE(!irqs_disabled());
1032
1033         if (!tr->allocated_snapshot) {
1034                 /* Only the nop tracer should hit this when disabling */
1035                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1036                 return;
1037         }
1038
1039         arch_spin_lock(&ftrace_max_lock);
1040
1041         buf = tr->trace_buffer.buffer;
1042         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1043         tr->max_buffer.buffer = buf;
1044
1045         __update_max_tr(tr, tsk, cpu);
1046         arch_spin_unlock(&ftrace_max_lock);
1047 }
1048
1049 /**
1050  * update_max_tr_single - only copy one trace over, and reset the rest
1051  * @tr - tracer
1052  * @tsk - task with the latency
1053  * @cpu - the cpu of the buffer to copy.
1054  *
1055  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1056  */
1057 void
1058 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1059 {
1060         int ret;
1061
1062         if (tr->stop_count)
1063                 return;
1064
1065         WARN_ON_ONCE(!irqs_disabled());
1066         if (!tr->allocated_snapshot) {
1067                 /* Only the nop tracer should hit this when disabling */
1068                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1069                 return;
1070         }
1071
1072         arch_spin_lock(&ftrace_max_lock);
1073
1074         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1075
1076         if (ret == -EBUSY) {
1077                 /*
1078                  * We failed to swap the buffer due to a commit taking
1079                  * place on this CPU. We fail to record, but we reset
1080                  * the max trace buffer (no one writes directly to it)
1081                  * and flag that it failed.
1082                  */
1083                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1084                         "Failed to swap buffers due to commit in progress\n");
1085         }
1086
1087         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1088
1089         __update_max_tr(tr, tsk, cpu);
1090         arch_spin_unlock(&ftrace_max_lock);
1091 }
1092 #endif /* CONFIG_TRACER_MAX_TRACE */
1093
1094 static void default_wait_pipe(struct trace_iterator *iter)
1095 {
1096         /* Iterators are static, they should be filled or empty */
1097         if (trace_buffer_iter(iter, iter->cpu_file))
1098                 return;
1099
1100         ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
1101 }
1102
1103 #ifdef CONFIG_FTRACE_STARTUP_TEST
1104 static int run_tracer_selftest(struct tracer *type)
1105 {
1106         struct trace_array *tr = &global_trace;
1107         struct tracer *saved_tracer = tr->current_trace;
1108         int ret;
1109
1110         if (!type->selftest || tracing_selftest_disabled)
1111                 return 0;
1112
1113         /*
1114          * Run a selftest on this tracer.
1115          * Here we reset the trace buffer, and set the current
1116          * tracer to be this tracer. The tracer can then run some
1117          * internal tracing to verify that everything is in order.
1118          * If we fail, we do not register this tracer.
1119          */
1120         tracing_reset_online_cpus(&tr->trace_buffer);
1121
1122         tr->current_trace = type;
1123
1124 #ifdef CONFIG_TRACER_MAX_TRACE
1125         if (type->use_max_tr) {
1126                 /* If we expanded the buffers, make sure the max is expanded too */
1127                 if (ring_buffer_expanded)
1128                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1129                                            RING_BUFFER_ALL_CPUS);
1130                 tr->allocated_snapshot = true;
1131         }
1132 #endif
1133
1134         /* the test is responsible for initializing and enabling */
1135         pr_info("Testing tracer %s: ", type->name);
1136         ret = type->selftest(type, tr);
1137         /* the test is responsible for resetting too */
1138         tr->current_trace = saved_tracer;
1139         if (ret) {
1140                 printk(KERN_CONT "FAILED!\n");
1141                 /* Add the warning after printing 'FAILED' */
1142                 WARN_ON(1);
1143                 return -1;
1144         }
1145         /* Only reset on passing, to avoid touching corrupted buffers */
1146         tracing_reset_online_cpus(&tr->trace_buffer);
1147
1148 #ifdef CONFIG_TRACER_MAX_TRACE
1149         if (type->use_max_tr) {
1150                 tr->allocated_snapshot = false;
1151
1152                 /* Shrink the max buffer again */
1153                 if (ring_buffer_expanded)
1154                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1155                                            RING_BUFFER_ALL_CPUS);
1156         }
1157 #endif
1158
1159         printk(KERN_CONT "PASSED\n");
1160         return 0;
1161 }
1162 #else
1163 static inline int run_tracer_selftest(struct tracer *type)
1164 {
1165         return 0;
1166 }
1167 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1168
1169 /**
1170  * register_tracer - register a tracer with the ftrace system.
1171  * @type - the plugin for the tracer
1172  *
1173  * Register a new plugin tracer.
1174  */
1175 int register_tracer(struct tracer *type)
1176 {
1177         struct tracer *t;
1178         int ret = 0;
1179
1180         if (!type->name) {
1181                 pr_info("Tracer must have a name\n");
1182                 return -1;
1183         }
1184
1185         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1186                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1187                 return -1;
1188         }
1189
1190         mutex_lock(&trace_types_lock);
1191
1192         tracing_selftest_running = true;
1193
1194         for (t = trace_types; t; t = t->next) {
1195                 if (strcmp(type->name, t->name) == 0) {
1196                         /* already found */
1197                         pr_info("Tracer %s already registered\n",
1198                                 type->name);
1199                         ret = -1;
1200                         goto out;
1201                 }
1202         }
1203
1204         if (!type->set_flag)
1205                 type->set_flag = &dummy_set_flag;
1206         if (!type->flags)
1207                 type->flags = &dummy_tracer_flags;
1208         else
1209                 if (!type->flags->opts)
1210                         type->flags->opts = dummy_tracer_opt;
1211         if (!type->wait_pipe)
1212                 type->wait_pipe = default_wait_pipe;
1213
1214         ret = run_tracer_selftest(type);
1215         if (ret < 0)
1216                 goto out;
1217
1218         type->next = trace_types;
1219         trace_types = type;
1220
1221  out:
1222         tracing_selftest_running = false;
1223         mutex_unlock(&trace_types_lock);
1224
1225         if (ret || !default_bootup_tracer)
1226                 goto out_unlock;
1227
1228         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1229                 goto out_unlock;
1230
1231         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1232         /* Do we want this tracer to start on bootup? */
1233         tracing_set_tracer(type->name);
1234         default_bootup_tracer = NULL;
1235         /* disable other selftests, since this will break it. */
1236         tracing_selftest_disabled = true;
1237 #ifdef CONFIG_FTRACE_STARTUP_TEST
1238         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1239                type->name);
1240 #endif
1241
1242  out_unlock:
1243         return ret;
1244 }
1245
1246 void tracing_reset(struct trace_buffer *buf, int cpu)
1247 {
1248         struct ring_buffer *buffer = buf->buffer;
1249
1250         if (!buffer)
1251                 return;
1252
1253         ring_buffer_record_disable(buffer);
1254
1255         /* Make sure all commits have finished */
1256         synchronize_sched();
1257         ring_buffer_reset_cpu(buffer, cpu);
1258
1259         ring_buffer_record_enable(buffer);
1260 }
1261
1262 void tracing_reset_online_cpus(struct trace_buffer *buf)
1263 {
1264         struct ring_buffer *buffer = buf->buffer;
1265         int cpu;
1266
1267         if (!buffer)
1268                 return;
1269
1270         ring_buffer_record_disable(buffer);
1271
1272         /* Make sure all commits have finished */
1273         synchronize_sched();
1274
1275         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1276
1277         for_each_online_cpu(cpu)
1278                 ring_buffer_reset_cpu(buffer, cpu);
1279
1280         ring_buffer_record_enable(buffer);
1281 }
1282
1283 /* Must have trace_types_lock held */
1284 void tracing_reset_all_online_cpus(void)
1285 {
1286         struct trace_array *tr;
1287
1288         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1289                 tracing_reset_online_cpus(&tr->trace_buffer);
1290 #ifdef CONFIG_TRACER_MAX_TRACE
1291                 tracing_reset_online_cpus(&tr->max_buffer);
1292 #endif
1293         }
1294 }
1295
1296 #define SAVED_CMDLINES 128
1297 #define NO_CMDLINE_MAP UINT_MAX
1298 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1299 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
1300 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
1301 static int cmdline_idx;
1302 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1303
1304 /* temporary disable recording */
1305 static atomic_t trace_record_cmdline_disabled __read_mostly;
1306
1307 static void trace_init_cmdlines(void)
1308 {
1309         memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
1310         memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
1311         cmdline_idx = 0;
1312 }
1313
1314 int is_tracing_stopped(void)
1315 {
1316         return global_trace.stop_count;
1317 }
1318
1319 /**
1320  * tracing_start - quick start of the tracer
1321  *
1322  * If tracing is enabled but was stopped by tracing_stop,
1323  * this will start the tracer back up.
1324  */
1325 void tracing_start(void)
1326 {
1327         struct ring_buffer *buffer;
1328         unsigned long flags;
1329
1330         if (tracing_disabled)
1331                 return;
1332
1333         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1334         if (--global_trace.stop_count) {
1335                 if (global_trace.stop_count < 0) {
1336                         /* Someone screwed up their debugging */
1337                         WARN_ON_ONCE(1);
1338                         global_trace.stop_count = 0;
1339                 }
1340                 goto out;
1341         }
1342
1343         /* Prevent the buffers from switching */
1344         arch_spin_lock(&ftrace_max_lock);
1345
1346         buffer = global_trace.trace_buffer.buffer;
1347         if (buffer)
1348                 ring_buffer_record_enable(buffer);
1349
1350 #ifdef CONFIG_TRACER_MAX_TRACE
1351         buffer = global_trace.max_buffer.buffer;
1352         if (buffer)
1353                 ring_buffer_record_enable(buffer);
1354 #endif
1355
1356         arch_spin_unlock(&ftrace_max_lock);
1357
1358         ftrace_start();
1359  out:
1360         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1361 }
1362
1363 static void tracing_start_tr(struct trace_array *tr)
1364 {
1365         struct ring_buffer *buffer;
1366         unsigned long flags;
1367
1368         if (tracing_disabled)
1369                 return;
1370
1371         /* If global, we need to also start the max tracer */
1372         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1373                 return tracing_start();
1374
1375         raw_spin_lock_irqsave(&tr->start_lock, flags);
1376
1377         if (--tr->stop_count) {
1378                 if (tr->stop_count < 0) {
1379                         /* Someone screwed up their debugging */
1380                         WARN_ON_ONCE(1);
1381                         tr->stop_count = 0;
1382                 }
1383                 goto out;
1384         }
1385
1386         buffer = tr->trace_buffer.buffer;
1387         if (buffer)
1388                 ring_buffer_record_enable(buffer);
1389
1390  out:
1391         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1392 }
1393
1394 /**
1395  * tracing_stop - quick stop of the tracer
1396  *
1397  * Light weight way to stop tracing. Use in conjunction with
1398  * tracing_start.
1399  */
1400 void tracing_stop(void)
1401 {
1402         struct ring_buffer *buffer;
1403         unsigned long flags;
1404
1405         ftrace_stop();
1406         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1407         if (global_trace.stop_count++)
1408                 goto out;
1409
1410         /* Prevent the buffers from switching */
1411         arch_spin_lock(&ftrace_max_lock);
1412
1413         buffer = global_trace.trace_buffer.buffer;
1414         if (buffer)
1415                 ring_buffer_record_disable(buffer);
1416
1417 #ifdef CONFIG_TRACER_MAX_TRACE
1418         buffer = global_trace.max_buffer.buffer;
1419         if (buffer)
1420                 ring_buffer_record_disable(buffer);
1421 #endif
1422
1423         arch_spin_unlock(&ftrace_max_lock);
1424
1425  out:
1426         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1427 }
1428
1429 static void tracing_stop_tr(struct trace_array *tr)
1430 {
1431         struct ring_buffer *buffer;
1432         unsigned long flags;
1433
1434         /* If global, we need to also stop the max tracer */
1435         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1436                 return tracing_stop();
1437
1438         raw_spin_lock_irqsave(&tr->start_lock, flags);
1439         if (tr->stop_count++)
1440                 goto out;
1441
1442         buffer = tr->trace_buffer.buffer;
1443         if (buffer)
1444                 ring_buffer_record_disable(buffer);
1445
1446  out:
1447         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1448 }
1449
1450 void trace_stop_cmdline_recording(void);
1451
1452 static void trace_save_cmdline(struct task_struct *tsk)
1453 {
1454         unsigned pid, idx;
1455
1456         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1457                 return;
1458
1459         /*
1460          * It's not the end of the world if we don't get
1461          * the lock, but we also don't want to spin
1462          * nor do we want to disable interrupts,
1463          * so if we miss here, then better luck next time.
1464          */
1465         if (!arch_spin_trylock(&trace_cmdline_lock))
1466                 return;
1467
1468         idx = map_pid_to_cmdline[tsk->pid];
1469         if (idx == NO_CMDLINE_MAP) {
1470                 idx = (cmdline_idx + 1) % SAVED_CMDLINES;
1471
1472                 /*
1473                  * Check whether the cmdline buffer at idx has a pid
1474                  * mapped. We are going to overwrite that entry so we
1475                  * need to clear the map_pid_to_cmdline. Otherwise we
1476                  * would read the new comm for the old pid.
1477                  */
1478                 pid = map_cmdline_to_pid[idx];
1479                 if (pid != NO_CMDLINE_MAP)
1480                         map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1481
1482                 map_cmdline_to_pid[idx] = tsk->pid;
1483                 map_pid_to_cmdline[tsk->pid] = idx;
1484
1485                 cmdline_idx = idx;
1486         }
1487
1488         memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
1489
1490         arch_spin_unlock(&trace_cmdline_lock);
1491 }
1492
1493 void trace_find_cmdline(int pid, char comm[])
1494 {
1495         unsigned map;
1496
1497         if (!pid) {
1498                 strcpy(comm, "<idle>");
1499                 return;
1500         }
1501
1502         if (WARN_ON_ONCE(pid < 0)) {
1503                 strcpy(comm, "<XXX>");
1504                 return;
1505         }
1506
1507         if (pid > PID_MAX_DEFAULT) {
1508                 strcpy(comm, "<...>");
1509                 return;
1510         }
1511
1512         preempt_disable();
1513         arch_spin_lock(&trace_cmdline_lock);
1514         map = map_pid_to_cmdline[pid];
1515         if (map != NO_CMDLINE_MAP)
1516                 strcpy(comm, saved_cmdlines[map]);
1517         else
1518                 strcpy(comm, "<...>");
1519
1520         arch_spin_unlock(&trace_cmdline_lock);
1521         preempt_enable();
1522 }
1523
1524 void tracing_record_cmdline(struct task_struct *tsk)
1525 {
1526         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1527                 return;
1528
1529         if (!__this_cpu_read(trace_cmdline_save))
1530                 return;
1531
1532         __this_cpu_write(trace_cmdline_save, false);
1533
1534         trace_save_cmdline(tsk);
1535 }
1536
1537 void
1538 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1539                              int pc)
1540 {
1541         struct task_struct *tsk = current;
1542
1543         entry->preempt_count            = pc & 0xff;
1544         entry->pid                      = (tsk) ? tsk->pid : 0;
1545         entry->flags =
1546 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1547                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1548 #else
1549                 TRACE_FLAG_IRQS_NOSUPPORT |
1550 #endif
1551                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1552                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1553                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1554                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1555 }
1556 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1557
1558 struct ring_buffer_event *
1559 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1560                           int type,
1561                           unsigned long len,
1562                           unsigned long flags, int pc)
1563 {
1564         struct ring_buffer_event *event;
1565
1566         event = ring_buffer_lock_reserve(buffer, len);
1567         if (event != NULL) {
1568                 struct trace_entry *ent = ring_buffer_event_data(event);
1569
1570                 tracing_generic_entry_update(ent, flags, pc);
1571                 ent->type = type;
1572         }
1573
1574         return event;
1575 }
1576
1577 void
1578 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1579 {
1580         __this_cpu_write(trace_cmdline_save, true);
1581         ring_buffer_unlock_commit(buffer, event);
1582 }
1583
1584 static inline void
1585 __trace_buffer_unlock_commit(struct ring_buffer *buffer,
1586                              struct ring_buffer_event *event,
1587                              unsigned long flags, int pc)
1588 {
1589         __buffer_unlock_commit(buffer, event);
1590
1591         ftrace_trace_stack(buffer, flags, 6, pc);
1592         ftrace_trace_userstack(buffer, flags, pc);
1593 }
1594
1595 void trace_buffer_unlock_commit(struct ring_buffer *buffer,
1596                                 struct ring_buffer_event *event,
1597                                 unsigned long flags, int pc)
1598 {
1599         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1600 }
1601 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1602
1603 struct ring_buffer_event *
1604 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1605                           struct ftrace_event_file *ftrace_file,
1606                           int type, unsigned long len,
1607                           unsigned long flags, int pc)
1608 {
1609         *current_rb = ftrace_file->tr->trace_buffer.buffer;
1610         return trace_buffer_lock_reserve(*current_rb,
1611                                          type, len, flags, pc);
1612 }
1613 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1614
1615 struct ring_buffer_event *
1616 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1617                                   int type, unsigned long len,
1618                                   unsigned long flags, int pc)
1619 {
1620         *current_rb = global_trace.trace_buffer.buffer;
1621         return trace_buffer_lock_reserve(*current_rb,
1622                                          type, len, flags, pc);
1623 }
1624 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1625
1626 void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
1627                                         struct ring_buffer_event *event,
1628                                         unsigned long flags, int pc)
1629 {
1630         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1631 }
1632 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
1633
1634 void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
1635                                      struct ring_buffer_event *event,
1636                                      unsigned long flags, int pc,
1637                                      struct pt_regs *regs)
1638 {
1639         __buffer_unlock_commit(buffer, event);
1640
1641         ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
1642         ftrace_trace_userstack(buffer, flags, pc);
1643 }
1644 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1645
1646 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1647                                          struct ring_buffer_event *event)
1648 {
1649         ring_buffer_discard_commit(buffer, event);
1650 }
1651 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1652
1653 void
1654 trace_function(struct trace_array *tr,
1655                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1656                int pc)
1657 {
1658         struct ftrace_event_call *call = &event_function;
1659         struct ring_buffer *buffer = tr->trace_buffer.buffer;
1660         struct ring_buffer_event *event;
1661         struct ftrace_entry *entry;
1662
1663         /* If we are reading the ring buffer, don't trace */
1664         if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1665                 return;
1666
1667         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1668                                           flags, pc);
1669         if (!event)
1670                 return;
1671         entry   = ring_buffer_event_data(event);
1672         entry->ip                       = ip;
1673         entry->parent_ip                = parent_ip;
1674
1675         if (!call_filter_check_discard(call, entry, buffer, event))
1676                 __buffer_unlock_commit(buffer, event);
1677 }
1678
1679 #ifdef CONFIG_STACKTRACE
1680
1681 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1682 struct ftrace_stack {
1683         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
1684 };
1685
1686 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1687 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1688
1689 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1690                                  unsigned long flags,
1691                                  int skip, int pc, struct pt_regs *regs)
1692 {
1693         struct ftrace_event_call *call = &event_kernel_stack;
1694         struct ring_buffer_event *event;
1695         struct stack_entry *entry;
1696         struct stack_trace trace;
1697         int use_stack;
1698         int size = FTRACE_STACK_ENTRIES;
1699
1700         trace.nr_entries        = 0;
1701         trace.skip              = skip;
1702
1703         /*
1704          * Since events can happen in NMIs there's no safe way to
1705          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1706          * or NMI comes in, it will just have to use the default
1707          * FTRACE_STACK_SIZE.
1708          */
1709         preempt_disable_notrace();
1710
1711         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1712         /*
1713          * We don't need any atomic variables, just a barrier.
1714          * If an interrupt comes in, we don't care, because it would
1715          * have exited and put the counter back to what we want.
1716          * We just need a barrier to keep gcc from moving things
1717          * around.
1718          */
1719         barrier();
1720         if (use_stack == 1) {
1721                 trace.entries           = &__get_cpu_var(ftrace_stack).calls[0];
1722                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
1723
1724                 if (regs)
1725                         save_stack_trace_regs(regs, &trace);
1726                 else
1727                         save_stack_trace(&trace);
1728
1729                 if (trace.nr_entries > size)
1730                         size = trace.nr_entries;
1731         } else
1732                 /* From now on, use_stack is a boolean */
1733                 use_stack = 0;
1734
1735         size *= sizeof(unsigned long);
1736
1737         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1738                                           sizeof(*entry) + size, flags, pc);
1739         if (!event)
1740                 goto out;
1741         entry = ring_buffer_event_data(event);
1742
1743         memset(&entry->caller, 0, size);
1744
1745         if (use_stack)
1746                 memcpy(&entry->caller, trace.entries,
1747                        trace.nr_entries * sizeof(unsigned long));
1748         else {
1749                 trace.max_entries       = FTRACE_STACK_ENTRIES;
1750                 trace.entries           = entry->caller;
1751                 if (regs)
1752                         save_stack_trace_regs(regs, &trace);
1753                 else
1754                         save_stack_trace(&trace);
1755         }
1756
1757         entry->size = trace.nr_entries;
1758
1759         if (!call_filter_check_discard(call, entry, buffer, event))
1760                 __buffer_unlock_commit(buffer, event);
1761
1762  out:
1763         /* Again, don't let gcc optimize things here */
1764         barrier();
1765         __this_cpu_dec(ftrace_stack_reserve);
1766         preempt_enable_notrace();
1767
1768 }
1769
1770 void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
1771                              int skip, int pc, struct pt_regs *regs)
1772 {
1773         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1774                 return;
1775
1776         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1777 }
1778
1779 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1780                         int skip, int pc)
1781 {
1782         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1783                 return;
1784
1785         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
1786 }
1787
1788 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1789                    int pc)
1790 {
1791         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1792 }
1793
1794 /**
1795  * trace_dump_stack - record a stack back trace in the trace buffer
1796  * @skip: Number of functions to skip (helper handlers)
1797  */
1798 void trace_dump_stack(int skip)
1799 {
1800         unsigned long flags;
1801
1802         if (tracing_disabled || tracing_selftest_running)
1803                 return;
1804
1805         local_save_flags(flags);
1806
1807         /*
1808          * Skip 3 more, seems to get us at the caller of
1809          * this function.
1810          */
1811         skip += 3;
1812         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
1813                              flags, skip, preempt_count(), NULL);
1814 }
1815
1816 static DEFINE_PER_CPU(int, user_stack_count);
1817
1818 void
1819 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1820 {
1821         struct ftrace_event_call *call = &event_user_stack;
1822         struct ring_buffer_event *event;
1823         struct userstack_entry *entry;
1824         struct stack_trace trace;
1825
1826         if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1827                 return;
1828
1829         /*
1830          * NMIs can not handle page faults, even with fix ups.
1831          * The save user stack can (and often does) fault.
1832          */
1833         if (unlikely(in_nmi()))
1834                 return;
1835
1836         /*
1837          * prevent recursion, since the user stack tracing may
1838          * trigger other kernel events.
1839          */
1840         preempt_disable();
1841         if (__this_cpu_read(user_stack_count))
1842                 goto out;
1843
1844         __this_cpu_inc(user_stack_count);
1845
1846         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1847                                           sizeof(*entry), flags, pc);
1848         if (!event)
1849                 goto out_drop_count;
1850         entry   = ring_buffer_event_data(event);
1851
1852         entry->tgid             = current->tgid;
1853         memset(&entry->caller, 0, sizeof(entry->caller));
1854
1855         trace.nr_entries        = 0;
1856         trace.max_entries       = FTRACE_STACK_ENTRIES;
1857         trace.skip              = 0;
1858         trace.entries           = entry->caller;
1859
1860         save_stack_trace_user(&trace);
1861         if (!call_filter_check_discard(call, entry, buffer, event))
1862                 __buffer_unlock_commit(buffer, event);
1863
1864  out_drop_count:
1865         __this_cpu_dec(user_stack_count);
1866  out:
1867         preempt_enable();
1868 }
1869
1870 #ifdef UNUSED
1871 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1872 {
1873         ftrace_trace_userstack(tr, flags, preempt_count());
1874 }
1875 #endif /* UNUSED */
1876
1877 #endif /* CONFIG_STACKTRACE */
1878
1879 /* created for use with alloc_percpu */
1880 struct trace_buffer_struct {
1881         char buffer[TRACE_BUF_SIZE];
1882 };
1883
1884 static struct trace_buffer_struct *trace_percpu_buffer;
1885 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1886 static struct trace_buffer_struct *trace_percpu_irq_buffer;
1887 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1888
1889 /*
1890  * The buffer used is dependent on the context. There is a per cpu
1891  * buffer for normal context, softirq contex, hard irq context and
1892  * for NMI context. Thise allows for lockless recording.
1893  *
1894  * Note, if the buffers failed to be allocated, then this returns NULL
1895  */
1896 static char *get_trace_buf(void)
1897 {
1898         struct trace_buffer_struct *percpu_buffer;
1899
1900         /*
1901          * If we have allocated per cpu buffers, then we do not
1902          * need to do any locking.
1903          */
1904         if (in_nmi())
1905                 percpu_buffer = trace_percpu_nmi_buffer;
1906         else if (in_irq())
1907                 percpu_buffer = trace_percpu_irq_buffer;
1908         else if (in_softirq())
1909                 percpu_buffer = trace_percpu_sirq_buffer;
1910         else
1911                 percpu_buffer = trace_percpu_buffer;
1912
1913         if (!percpu_buffer)
1914                 return NULL;
1915
1916         return this_cpu_ptr(&percpu_buffer->buffer[0]);
1917 }
1918
1919 static int alloc_percpu_trace_buffer(void)
1920 {
1921         struct trace_buffer_struct *buffers;
1922         struct trace_buffer_struct *sirq_buffers;
1923         struct trace_buffer_struct *irq_buffers;
1924         struct trace_buffer_struct *nmi_buffers;
1925
1926         buffers = alloc_percpu(struct trace_buffer_struct);
1927         if (!buffers)
1928                 goto err_warn;
1929
1930         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
1931         if (!sirq_buffers)
1932                 goto err_sirq;
1933
1934         irq_buffers = alloc_percpu(struct trace_buffer_struct);
1935         if (!irq_buffers)
1936                 goto err_irq;
1937
1938         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
1939         if (!nmi_buffers)
1940                 goto err_nmi;
1941
1942         trace_percpu_buffer = buffers;
1943         trace_percpu_sirq_buffer = sirq_buffers;
1944         trace_percpu_irq_buffer = irq_buffers;
1945         trace_percpu_nmi_buffer = nmi_buffers;
1946
1947         return 0;
1948
1949  err_nmi:
1950         free_percpu(irq_buffers);
1951  err_irq:
1952         free_percpu(sirq_buffers);
1953  err_sirq:
1954         free_percpu(buffers);
1955  err_warn:
1956         WARN(1, "Could not allocate percpu trace_printk buffer");
1957         return -ENOMEM;
1958 }
1959
1960 static int buffers_allocated;
1961
1962 void trace_printk_init_buffers(void)
1963 {
1964         if (buffers_allocated)
1965                 return;
1966
1967         if (alloc_percpu_trace_buffer())
1968                 return;
1969
1970         pr_info("ftrace: Allocated trace_printk buffers\n");
1971
1972         /* Expand the buffers to set size */
1973         tracing_update_buffers();
1974
1975         buffers_allocated = 1;
1976
1977         /*
1978          * trace_printk_init_buffers() can be called by modules.
1979          * If that happens, then we need to start cmdline recording
1980          * directly here. If the global_trace.buffer is already
1981          * allocated here, then this was called by module code.
1982          */
1983         if (global_trace.trace_buffer.buffer)
1984                 tracing_start_cmdline_record();
1985 }
1986
1987 void trace_printk_start_comm(void)
1988 {
1989         /* Start tracing comms if trace printk is set */
1990         if (!buffers_allocated)
1991                 return;
1992         tracing_start_cmdline_record();
1993 }
1994
1995 static void trace_printk_start_stop_comm(int enabled)
1996 {
1997         if (!buffers_allocated)
1998                 return;
1999
2000         if (enabled)
2001                 tracing_start_cmdline_record();
2002         else
2003                 tracing_stop_cmdline_record();
2004 }
2005
2006 /**
2007  * trace_vbprintk - write binary msg to tracing buffer
2008  *
2009  */
2010 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2011 {
2012         struct ftrace_event_call *call = &event_bprint;
2013         struct ring_buffer_event *event;
2014         struct ring_buffer *buffer;
2015         struct trace_array *tr = &global_trace;
2016         struct bprint_entry *entry;
2017         unsigned long flags;
2018         char *tbuffer;
2019         int len = 0, size, pc;
2020
2021         if (unlikely(tracing_selftest_running || tracing_disabled))
2022                 return 0;
2023
2024         /* Don't pollute graph traces with trace_vprintk internals */
2025         pause_graph_tracing();
2026
2027         pc = preempt_count();
2028         preempt_disable_notrace();
2029
2030         tbuffer = get_trace_buf();
2031         if (!tbuffer) {
2032                 len = 0;
2033                 goto out;
2034         }
2035
2036         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2037
2038         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2039                 goto out;
2040
2041         local_save_flags(flags);
2042         size = sizeof(*entry) + sizeof(u32) * len;
2043         buffer = tr->trace_buffer.buffer;
2044         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2045                                           flags, pc);
2046         if (!event)
2047                 goto out;
2048         entry = ring_buffer_event_data(event);
2049         entry->ip                       = ip;
2050         entry->fmt                      = fmt;
2051
2052         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2053         if (!call_filter_check_discard(call, entry, buffer, event)) {
2054                 __buffer_unlock_commit(buffer, event);
2055                 ftrace_trace_stack(buffer, flags, 6, pc);
2056         }
2057
2058 out:
2059         preempt_enable_notrace();
2060         unpause_graph_tracing();
2061
2062         return len;
2063 }
2064 EXPORT_SYMBOL_GPL(trace_vbprintk);
2065
2066 static int
2067 __trace_array_vprintk(struct ring_buffer *buffer,
2068                       unsigned long ip, const char *fmt, va_list args)
2069 {
2070         struct ftrace_event_call *call = &event_print;
2071         struct ring_buffer_event *event;
2072         int len = 0, size, pc;
2073         struct print_entry *entry;
2074         unsigned long flags;
2075         char *tbuffer;
2076
2077         if (tracing_disabled || tracing_selftest_running)
2078                 return 0;
2079
2080         /* Don't pollute graph traces with trace_vprintk internals */
2081         pause_graph_tracing();
2082
2083         pc = preempt_count();
2084         preempt_disable_notrace();
2085
2086
2087         tbuffer = get_trace_buf();
2088         if (!tbuffer) {
2089                 len = 0;
2090                 goto out;
2091         }
2092
2093         len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2094         if (len > TRACE_BUF_SIZE)
2095                 goto out;
2096
2097         local_save_flags(flags);
2098         size = sizeof(*entry) + len + 1;
2099         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2100                                           flags, pc);
2101         if (!event)
2102                 goto out;
2103         entry = ring_buffer_event_data(event);
2104         entry->ip = ip;
2105
2106         memcpy(&entry->buf, tbuffer, len);
2107         entry->buf[len] = '\0';
2108         if (!call_filter_check_discard(call, entry, buffer, event)) {
2109                 __buffer_unlock_commit(buffer, event);
2110                 ftrace_trace_stack(buffer, flags, 6, pc);
2111         }
2112  out:
2113         preempt_enable_notrace();
2114         unpause_graph_tracing();
2115
2116         return len;
2117 }
2118
2119 int trace_array_vprintk(struct trace_array *tr,
2120                         unsigned long ip, const char *fmt, va_list args)
2121 {
2122         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2123 }
2124
2125 int trace_array_printk(struct trace_array *tr,
2126                        unsigned long ip, const char *fmt, ...)
2127 {
2128         int ret;
2129         va_list ap;
2130
2131         if (!(trace_flags & TRACE_ITER_PRINTK))
2132                 return 0;
2133
2134         va_start(ap, fmt);
2135         ret = trace_array_vprintk(tr, ip, fmt, ap);
2136         va_end(ap);
2137         return ret;
2138 }
2139
2140 int trace_array_printk_buf(struct ring_buffer *buffer,
2141                            unsigned long ip, const char *fmt, ...)
2142 {
2143         int ret;
2144         va_list ap;
2145
2146         if (!(trace_flags & TRACE_ITER_PRINTK))
2147                 return 0;
2148
2149         va_start(ap, fmt);
2150         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2151         va_end(ap);
2152         return ret;
2153 }
2154
2155 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2156 {
2157         return trace_array_vprintk(&global_trace, ip, fmt, args);
2158 }
2159 EXPORT_SYMBOL_GPL(trace_vprintk);
2160
2161 static void trace_iterator_increment(struct trace_iterator *iter)
2162 {
2163         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2164
2165         iter->idx++;
2166         if (buf_iter)
2167                 ring_buffer_read(buf_iter, NULL);
2168 }
2169
2170 static struct trace_entry *
2171 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2172                 unsigned long *lost_events)
2173 {
2174         struct ring_buffer_event *event;
2175         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2176
2177         if (buf_iter)
2178                 event = ring_buffer_iter_peek(buf_iter, ts);
2179         else
2180                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2181                                          lost_events);
2182
2183         if (event) {
2184                 iter->ent_size = ring_buffer_event_length(event);
2185                 return ring_buffer_event_data(event);
2186         }
2187         iter->ent_size = 0;
2188         return NULL;
2189 }
2190
2191 static struct trace_entry *
2192 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2193                   unsigned long *missing_events, u64 *ent_ts)
2194 {
2195         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2196         struct trace_entry *ent, *next = NULL;
2197         unsigned long lost_events = 0, next_lost = 0;
2198         int cpu_file = iter->cpu_file;
2199         u64 next_ts = 0, ts;
2200         int next_cpu = -1;
2201         int next_size = 0;
2202         int cpu;
2203
2204         /*
2205          * If we are in a per_cpu trace file, don't bother by iterating over
2206          * all cpu and peek directly.
2207          */
2208         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2209                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2210                         return NULL;
2211                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2212                 if (ent_cpu)
2213                         *ent_cpu = cpu_file;
2214
2215                 return ent;
2216         }
2217
2218         for_each_tracing_cpu(cpu) {
2219
2220                 if (ring_buffer_empty_cpu(buffer, cpu))
2221                         continue;
2222
2223                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2224
2225                 /*
2226                  * Pick the entry with the smallest timestamp:
2227                  */
2228                 if (ent && (!next || ts < next_ts)) {
2229                         next = ent;
2230                         next_cpu = cpu;
2231                         next_ts = ts;
2232                         next_lost = lost_events;
2233                         next_size = iter->ent_size;
2234                 }
2235         }
2236
2237         iter->ent_size = next_size;
2238
2239         if (ent_cpu)
2240                 *ent_cpu = next_cpu;
2241
2242         if (ent_ts)
2243                 *ent_ts = next_ts;
2244
2245         if (missing_events)
2246                 *missing_events = next_lost;
2247
2248         return next;
2249 }
2250
2251 /* Find the next real entry, without updating the iterator itself */
2252 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2253                                           int *ent_cpu, u64 *ent_ts)
2254 {
2255         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2256 }
2257
2258 /* Find the next real entry, and increment the iterator to the next entry */
2259 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2260 {
2261         iter->ent = __find_next_entry(iter, &iter->cpu,
2262                                       &iter->lost_events, &iter->ts);
2263
2264         if (iter->ent)
2265                 trace_iterator_increment(iter);
2266
2267         return iter->ent ? iter : NULL;
2268 }
2269
2270 static void trace_consume(struct trace_iterator *iter)
2271 {
2272         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2273                             &iter->lost_events);
2274 }
2275
2276 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2277 {
2278         struct trace_iterator *iter = m->private;
2279         int i = (int)*pos;
2280         void *ent;
2281
2282         WARN_ON_ONCE(iter->leftover);
2283
2284         (*pos)++;
2285
2286         /* can't go backwards */
2287         if (iter->idx > i)
2288                 return NULL;
2289
2290         if (iter->idx < 0)
2291                 ent = trace_find_next_entry_inc(iter);
2292         else
2293                 ent = iter;
2294
2295         while (ent && iter->idx < i)
2296                 ent = trace_find_next_entry_inc(iter);
2297
2298         iter->pos = *pos;
2299
2300         return ent;
2301 }
2302
2303 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2304 {
2305         struct ring_buffer_event *event;
2306         struct ring_buffer_iter *buf_iter;
2307         unsigned long entries = 0;
2308         u64 ts;
2309
2310         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2311
2312         buf_iter = trace_buffer_iter(iter, cpu);
2313         if (!buf_iter)
2314                 return;
2315
2316         ring_buffer_iter_reset(buf_iter);
2317
2318         /*
2319          * We could have the case with the max latency tracers
2320          * that a reset never took place on a cpu. This is evident
2321          * by the timestamp being before the start of the buffer.
2322          */
2323         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2324                 if (ts >= iter->trace_buffer->time_start)
2325                         break;
2326                 entries++;
2327                 ring_buffer_read(buf_iter, NULL);
2328         }
2329
2330         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2331 }
2332
2333 /*
2334  * The current tracer is copied to avoid a global locking
2335  * all around.
2336  */
2337 static void *s_start(struct seq_file *m, loff_t *pos)
2338 {
2339         struct trace_iterator *iter = m->private;
2340         struct trace_array *tr = iter->tr;
2341         int cpu_file = iter->cpu_file;
2342         void *p = NULL;
2343         loff_t l = 0;
2344         int cpu;
2345
2346         /*
2347          * copy the tracer to avoid using a global lock all around.
2348          * iter->trace is a copy of current_trace, the pointer to the
2349          * name may be used instead of a strcmp(), as iter->trace->name
2350          * will point to the same string as current_trace->name.
2351          */
2352         mutex_lock(&trace_types_lock);
2353         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2354                 *iter->trace = *tr->current_trace;
2355         mutex_unlock(&trace_types_lock);
2356
2357 #ifdef CONFIG_TRACER_MAX_TRACE
2358         if (iter->snapshot && iter->trace->use_max_tr)
2359                 return ERR_PTR(-EBUSY);
2360 #endif
2361
2362         if (!iter->snapshot)
2363                 atomic_inc(&trace_record_cmdline_disabled);
2364
2365         if (*pos != iter->pos) {
2366                 iter->ent = NULL;
2367                 iter->cpu = 0;
2368                 iter->idx = -1;
2369
2370                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2371                         for_each_tracing_cpu(cpu)
2372                                 tracing_iter_reset(iter, cpu);
2373                 } else
2374                         tracing_iter_reset(iter, cpu_file);
2375
2376                 iter->leftover = 0;
2377                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2378                         ;
2379
2380         } else {
2381                 /*
2382                  * If we overflowed the seq_file before, then we want
2383                  * to just reuse the trace_seq buffer again.
2384                  */
2385                 if (iter->leftover)
2386                         p = iter;
2387                 else {
2388                         l = *pos - 1;
2389                         p = s_next(m, p, &l);
2390                 }
2391         }
2392
2393         trace_event_read_lock();
2394         trace_access_lock(cpu_file);
2395         return p;
2396 }
2397
2398 static void s_stop(struct seq_file *m, void *p)
2399 {
2400         struct trace_iterator *iter = m->private;
2401
2402 #ifdef CONFIG_TRACER_MAX_TRACE
2403         if (iter->snapshot && iter->trace->use_max_tr)
2404                 return;
2405 #endif
2406
2407         if (!iter->snapshot)
2408                 atomic_dec(&trace_record_cmdline_disabled);
2409
2410         trace_access_unlock(iter->cpu_file);
2411         trace_event_read_unlock();
2412 }
2413
2414 static void
2415 get_total_entries(struct trace_buffer *buf,
2416                   unsigned long *total, unsigned long *entries)
2417 {
2418         unsigned long count;
2419         int cpu;
2420
2421         *total = 0;
2422         *entries = 0;
2423
2424         for_each_tracing_cpu(cpu) {
2425                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2426                 /*
2427                  * If this buffer has skipped entries, then we hold all
2428                  * entries for the trace and we need to ignore the
2429                  * ones before the time stamp.
2430                  */
2431                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2432                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2433                         /* total is the same as the entries */
2434                         *total += count;
2435                 } else
2436                         *total += count +
2437                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2438                 *entries += count;
2439         }
2440 }
2441
2442 static void print_lat_help_header(struct seq_file *m)
2443 {
2444         seq_puts(m, "#                  _------=> CPU#            \n");
2445         seq_puts(m, "#                 / _-----=> irqs-off        \n");
2446         seq_puts(m, "#                | / _----=> need-resched    \n");
2447         seq_puts(m, "#                || / _---=> hardirq/softirq \n");
2448         seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
2449         seq_puts(m, "#                |||| /     delay             \n");
2450         seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
2451         seq_puts(m, "#     \\   /      |||||  \\    |   /           \n");
2452 }
2453
2454 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2455 {
2456         unsigned long total;
2457         unsigned long entries;
2458
2459         get_total_entries(buf, &total, &entries);
2460         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2461                    entries, total, num_online_cpus());
2462         seq_puts(m, "#\n");
2463 }
2464
2465 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2466 {
2467         print_event_info(buf, m);
2468         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n");
2469         seq_puts(m, "#              | |       |          |         |\n");
2470 }
2471
2472 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2473 {
2474         print_event_info(buf, m);
2475         seq_puts(m, "#                              _-----=> irqs-off\n");
2476         seq_puts(m, "#                             / _----=> need-resched\n");
2477         seq_puts(m, "#                            | / _---=> hardirq/softirq\n");
2478         seq_puts(m, "#                            || / _--=> preempt-depth\n");
2479         seq_puts(m, "#                            ||| /     delay\n");
2480         seq_puts(m, "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n");
2481         seq_puts(m, "#              | |       |   ||||       |         |\n");
2482 }
2483
2484 void
2485 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2486 {
2487         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2488         struct trace_buffer *buf = iter->trace_buffer;
2489         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2490         struct tracer *type = iter->trace;
2491         unsigned long entries;
2492         unsigned long total;
2493         const char *name = "preemption";
2494
2495         name = type->name;
2496
2497         get_total_entries(buf, &total, &entries);
2498
2499         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2500                    name, UTS_RELEASE);
2501         seq_puts(m, "# -----------------------------------"
2502                  "---------------------------------\n");
2503         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2504                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2505                    nsecs_to_usecs(data->saved_latency),
2506                    entries,
2507                    total,
2508                    buf->cpu,
2509 #if defined(CONFIG_PREEMPT_NONE)
2510                    "server",
2511 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2512                    "desktop",
2513 #elif defined(CONFIG_PREEMPT)
2514                    "preempt",
2515 #else
2516                    "unknown",
2517 #endif
2518                    /* These are reserved for later use */
2519                    0, 0, 0, 0);
2520 #ifdef CONFIG_SMP
2521         seq_printf(m, " #P:%d)\n", num_online_cpus());
2522 #else
2523         seq_puts(m, ")\n");
2524 #endif
2525         seq_puts(m, "#    -----------------\n");
2526         seq_printf(m, "#    | task: %.16s-%d "
2527                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2528                    data->comm, data->pid,
2529                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2530                    data->policy, data->rt_priority);
2531         seq_puts(m, "#    -----------------\n");
2532
2533         if (data->critical_start) {
2534                 seq_puts(m, "#  => started at: ");
2535                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2536                 trace_print_seq(m, &iter->seq);
2537                 seq_puts(m, "\n#  => ended at:   ");
2538                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2539                 trace_print_seq(m, &iter->seq);
2540                 seq_puts(m, "\n#\n");
2541         }
2542
2543         seq_puts(m, "#\n");
2544 }
2545
2546 static void test_cpu_buff_start(struct trace_iterator *iter)
2547 {
2548         struct trace_seq *s = &iter->seq;
2549
2550         if (!(trace_flags & TRACE_ITER_ANNOTATE))
2551                 return;
2552
2553         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2554                 return;
2555
2556         if (cpumask_test_cpu(iter->cpu, iter->started))
2557                 return;
2558
2559         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2560                 return;
2561
2562         cpumask_set_cpu(iter->cpu, iter->started);
2563
2564         /* Don't print started cpu buffer for the first entry of the trace */
2565         if (iter->idx > 1)
2566                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2567                                 iter->cpu);
2568 }
2569
2570 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2571 {
2572         struct trace_seq *s = &iter->seq;
2573         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2574         struct trace_entry *entry;
2575         struct trace_event *event;
2576
2577         entry = iter->ent;
2578
2579         test_cpu_buff_start(iter);
2580
2581         event = ftrace_find_event(entry->type);
2582
2583         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2584                 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2585                         if (!trace_print_lat_context(iter))
2586                                 goto partial;
2587                 } else {
2588                         if (!trace_print_context(iter))
2589                                 goto partial;
2590                 }
2591         }
2592
2593         if (event)
2594                 return event->funcs->trace(iter, sym_flags, event);
2595
2596         if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
2597                 goto partial;
2598
2599         return TRACE_TYPE_HANDLED;
2600 partial:
2601         return TRACE_TYPE_PARTIAL_LINE;
2602 }
2603
2604 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2605 {
2606         struct trace_seq *s = &iter->seq;
2607         struct trace_entry *entry;
2608         struct trace_event *event;
2609
2610         entry = iter->ent;
2611
2612         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2613                 if (!trace_seq_printf(s, "%d %d %llu ",
2614                                       entry->pid, iter->cpu, iter->ts))
2615                         goto partial;
2616         }
2617
2618         event = ftrace_find_event(entry->type);
2619         if (event)
2620                 return event->funcs->raw(iter, 0, event);
2621
2622         if (!trace_seq_printf(s, "%d ?\n", entry->type))
2623                 goto partial;
2624
2625         return TRACE_TYPE_HANDLED;
2626 partial:
2627         return TRACE_TYPE_PARTIAL_LINE;
2628 }
2629
2630 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2631 {
2632         struct trace_seq *s = &iter->seq;
2633         unsigned char newline = '\n';
2634         struct trace_entry *entry;
2635         struct trace_event *event;
2636
2637         entry = iter->ent;
2638
2639         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2640                 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
2641                 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
2642                 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
2643         }
2644
2645         event = ftrace_find_event(entry->type);
2646         if (event) {
2647                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2648                 if (ret != TRACE_TYPE_HANDLED)
2649                         return ret;
2650         }
2651
2652         SEQ_PUT_FIELD_RET(s, newline);
2653
2654         return TRACE_TYPE_HANDLED;
2655 }
2656
2657 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2658 {
2659         struct trace_seq *s = &iter->seq;
2660         struct trace_entry *entry;
2661         struct trace_event *event;
2662
2663         entry = iter->ent;
2664
2665         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2666                 SEQ_PUT_FIELD_RET(s, entry->pid);
2667                 SEQ_PUT_FIELD_RET(s, iter->cpu);
2668                 SEQ_PUT_FIELD_RET(s, iter->ts);
2669         }
2670
2671         event = ftrace_find_event(entry->type);
2672         return event ? event->funcs->binary(iter, 0, event) :
2673                 TRACE_TYPE_HANDLED;
2674 }
2675
2676 int trace_empty(struct trace_iterator *iter)
2677 {
2678         struct ring_buffer_iter *buf_iter;
2679         int cpu;
2680
2681         /* If we are looking at one CPU buffer, only check that one */
2682         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2683                 cpu = iter->cpu_file;
2684                 buf_iter = trace_buffer_iter(iter, cpu);
2685                 if (buf_iter) {
2686                         if (!ring_buffer_iter_empty(buf_iter))
2687                                 return 0;
2688                 } else {
2689                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2690                                 return 0;
2691                 }
2692                 return 1;
2693         }
2694
2695         for_each_tracing_cpu(cpu) {
2696                 buf_iter = trace_buffer_iter(iter, cpu);
2697                 if (buf_iter) {
2698                         if (!ring_buffer_iter_empty(buf_iter))
2699                                 return 0;
2700                 } else {
2701                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2702                                 return 0;
2703                 }
2704         }
2705
2706         return 1;
2707 }
2708
2709 /*  Called with trace_event_read_lock() held. */
2710 enum print_line_t print_trace_line(struct trace_iterator *iter)
2711 {
2712         enum print_line_t ret;
2713
2714         if (iter->lost_events &&
2715             !trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2716                                  iter->cpu, iter->lost_events))
2717                 return TRACE_TYPE_PARTIAL_LINE;
2718
2719         if (iter->trace && iter->trace->print_line) {
2720                 ret = iter->trace->print_line(iter);
2721                 if (ret != TRACE_TYPE_UNHANDLED)
2722                         return ret;
2723         }
2724
2725         if (iter->ent->type == TRACE_BPUTS &&
2726                         trace_flags & TRACE_ITER_PRINTK &&
2727                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2728                 return trace_print_bputs_msg_only(iter);
2729
2730         if (iter->ent->type == TRACE_BPRINT &&
2731                         trace_flags & TRACE_ITER_PRINTK &&
2732                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2733                 return trace_print_bprintk_msg_only(iter);
2734
2735         if (iter->ent->type == TRACE_PRINT &&
2736                         trace_flags & TRACE_ITER_PRINTK &&
2737                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2738                 return trace_print_printk_msg_only(iter);
2739
2740         if (trace_flags & TRACE_ITER_BIN)
2741                 return print_bin_fmt(iter);
2742
2743         if (trace_flags & TRACE_ITER_HEX)
2744                 return print_hex_fmt(iter);
2745
2746         if (trace_flags & TRACE_ITER_RAW)
2747                 return print_raw_fmt(iter);
2748
2749         return print_trace_fmt(iter);
2750 }
2751
2752 void trace_latency_header(struct seq_file *m)
2753 {
2754         struct trace_iterator *iter = m->private;
2755
2756         /* print nothing if the buffers are empty */
2757         if (trace_empty(iter))
2758                 return;
2759
2760         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2761                 print_trace_header(m, iter);
2762
2763         if (!(trace_flags & TRACE_ITER_VERBOSE))
2764                 print_lat_help_header(m);
2765 }
2766
2767 void trace_default_header(struct seq_file *m)
2768 {
2769         struct trace_iterator *iter = m->private;
2770
2771         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2772                 return;
2773
2774         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2775                 /* print nothing if the buffers are empty */
2776                 if (trace_empty(iter))
2777                         return;
2778                 print_trace_header(m, iter);
2779                 if (!(trace_flags & TRACE_ITER_VERBOSE))
2780                         print_lat_help_header(m);
2781         } else {
2782                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2783                         if (trace_flags & TRACE_ITER_IRQ_INFO)
2784                                 print_func_help_header_irq(iter->trace_buffer, m);
2785                         else
2786                                 print_func_help_header(iter->trace_buffer, m);
2787                 }
2788         }
2789 }
2790
2791 static void test_ftrace_alive(struct seq_file *m)
2792 {
2793         if (!ftrace_is_dead())
2794                 return;
2795         seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n");
2796         seq_printf(m, "#          MAY BE MISSING FUNCTION EVENTS\n");
2797 }
2798
2799 #ifdef CONFIG_TRACER_MAX_TRACE
2800 static void show_snapshot_main_help(struct seq_file *m)
2801 {
2802         seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n");
2803         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2804         seq_printf(m, "#                      Takes a snapshot of the main buffer.\n");
2805         seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n");
2806         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2807         seq_printf(m, "#                       is not a '0' or '1')\n");
2808 }
2809
2810 static void show_snapshot_percpu_help(struct seq_file *m)
2811 {
2812         seq_printf(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2813 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2814         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2815         seq_printf(m, "#                      Takes a snapshot of the main buffer for this cpu.\n");
2816 #else
2817         seq_printf(m, "# echo 1 > snapshot : Not supported with this kernel.\n");
2818         seq_printf(m, "#                     Must use main snapshot file to allocate.\n");
2819 #endif
2820         seq_printf(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n");
2821         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2822         seq_printf(m, "#                       is not a '0' or '1')\n");
2823 }
2824
2825 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2826 {
2827         if (iter->tr->allocated_snapshot)
2828                 seq_printf(m, "#\n# * Snapshot is allocated *\n#\n");
2829         else
2830                 seq_printf(m, "#\n# * Snapshot is freed *\n#\n");
2831
2832         seq_printf(m, "# Snapshot commands:\n");
2833         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2834                 show_snapshot_main_help(m);
2835         else
2836                 show_snapshot_percpu_help(m);
2837 }
2838 #else
2839 /* Should never be called */
2840 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2841 #endif
2842
2843 static int s_show(struct seq_file *m, void *v)
2844 {
2845         struct trace_iterator *iter = v;
2846         int ret;
2847
2848         if (iter->ent == NULL) {
2849                 if (iter->tr) {
2850                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
2851                         seq_puts(m, "#\n");
2852                         test_ftrace_alive(m);
2853                 }
2854                 if (iter->snapshot && trace_empty(iter))
2855                         print_snapshot_help(m, iter);
2856                 else if (iter->trace && iter->trace->print_header)
2857                         iter->trace->print_header(m);
2858                 else
2859                         trace_default_header(m);
2860
2861         } else if (iter->leftover) {
2862                 /*
2863                  * If we filled the seq_file buffer earlier, we
2864                  * want to just show it now.
2865                  */
2866                 ret = trace_print_seq(m, &iter->seq);
2867
2868                 /* ret should this time be zero, but you never know */
2869                 iter->leftover = ret;
2870
2871         } else {
2872                 print_trace_line(iter);
2873                 ret = trace_print_seq(m, &iter->seq);
2874                 /*
2875                  * If we overflow the seq_file buffer, then it will
2876                  * ask us for this data again at start up.
2877                  * Use that instead.
2878                  *  ret is 0 if seq_file write succeeded.
2879                  *        -1 otherwise.
2880                  */
2881                 iter->leftover = ret;
2882         }
2883
2884         return 0;
2885 }
2886
2887 /*
2888  * Should be used after trace_array_get(), trace_types_lock
2889  * ensures that i_cdev was already initialized.
2890  */
2891 static inline int tracing_get_cpu(struct inode *inode)
2892 {
2893         if (inode->i_cdev) /* See trace_create_cpu_file() */
2894                 return (long)inode->i_cdev - 1;
2895         return RING_BUFFER_ALL_CPUS;
2896 }
2897
2898 static const struct seq_operations tracer_seq_ops = {
2899         .start          = s_start,
2900         .next           = s_next,
2901         .stop           = s_stop,
2902         .show           = s_show,
2903 };
2904
2905 static struct trace_iterator *
2906 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
2907 {
2908         struct trace_array *tr = inode->i_private;
2909         struct trace_iterator *iter;
2910         int cpu;
2911
2912         if (tracing_disabled)
2913                 return ERR_PTR(-ENODEV);
2914
2915         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
2916         if (!iter)
2917                 return ERR_PTR(-ENOMEM);
2918
2919         iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
2920                                     GFP_KERNEL);
2921         if (!iter->buffer_iter)
2922                 goto release;
2923
2924         /*
2925          * We make a copy of the current tracer to avoid concurrent
2926          * changes on it while we are reading.
2927          */
2928         mutex_lock(&trace_types_lock);
2929         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
2930         if (!iter->trace)
2931                 goto fail;
2932
2933         *iter->trace = *tr->current_trace;
2934
2935         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
2936                 goto fail;
2937
2938         iter->tr = tr;
2939
2940 #ifdef CONFIG_TRACER_MAX_TRACE
2941         /* Currently only the top directory has a snapshot */
2942         if (tr->current_trace->print_max || snapshot)
2943                 iter->trace_buffer = &tr->max_buffer;
2944         else
2945 #endif
2946                 iter->trace_buffer = &tr->trace_buffer;
2947         iter->snapshot = snapshot;
2948         iter->pos = -1;
2949         iter->cpu_file = tracing_get_cpu(inode);
2950         mutex_init(&iter->mutex);
2951
2952         /* Notify the tracer early; before we stop tracing. */
2953         if (iter->trace && iter->trace->open)
2954                 iter->trace->open(iter);
2955
2956         /* Annotate start of buffers if we had overruns */
2957         if (ring_buffer_overruns(iter->trace_buffer->buffer))
2958                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
2959
2960         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
2961         if (trace_clocks[tr->clock_id].in_ns)
2962                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
2963
2964         /* stop the trace while dumping if we are not opening "snapshot" */
2965         if (!iter->snapshot)
2966                 tracing_stop_tr(tr);
2967
2968         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
2969                 for_each_tracing_cpu(cpu) {
2970                         iter->buffer_iter[cpu] =
2971                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
2972                 }
2973                 ring_buffer_read_prepare_sync();
2974                 for_each_tracing_cpu(cpu) {
2975                         ring_buffer_read_start(iter->buffer_iter[cpu]);
2976                         tracing_iter_reset(iter, cpu);
2977                 }
2978         } else {
2979                 cpu = iter->cpu_file;
2980                 iter->buffer_iter[cpu] =
2981                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
2982                 ring_buffer_read_prepare_sync();
2983                 ring_buffer_read_start(iter->buffer_iter[cpu]);
2984                 tracing_iter_reset(iter, cpu);
2985         }
2986
2987         mutex_unlock(&trace_types_lock);
2988
2989         return iter;
2990
2991  fail:
2992         mutex_unlock(&trace_types_lock);
2993         kfree(iter->trace);
2994         kfree(iter->buffer_iter);
2995 release:
2996         seq_release_private(inode, file);
2997         return ERR_PTR(-ENOMEM);
2998 }
2999
3000 int tracing_open_generic(struct inode *inode, struct file *filp)
3001 {
3002         if (tracing_disabled)
3003                 return -ENODEV;
3004
3005         filp->private_data = inode->i_private;
3006         return 0;
3007 }
3008
3009 bool tracing_is_disabled(void)
3010 {
3011         return (tracing_disabled) ? true: false;
3012 }
3013
3014 /*
3015  * Open and update trace_array ref count.
3016  * Must have the current trace_array passed to it.
3017  */
3018 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3019 {
3020         struct trace_array *tr = inode->i_private;
3021
3022         if (tracing_disabled)
3023                 return -ENODEV;
3024
3025         if (trace_array_get(tr) < 0)
3026                 return -ENODEV;
3027
3028         filp->private_data = inode->i_private;
3029
3030         return 0;
3031 }
3032
3033 static int tracing_release(struct inode *inode, struct file *file)
3034 {
3035         struct trace_array *tr = inode->i_private;
3036         struct seq_file *m = file->private_data;
3037         struct trace_iterator *iter;
3038         int cpu;
3039
3040         if (!(file->f_mode & FMODE_READ)) {
3041                 trace_array_put(tr);
3042                 return 0;
3043         }
3044
3045         /* Writes do not use seq_file */
3046         iter = m->private;
3047         mutex_lock(&trace_types_lock);
3048
3049         for_each_tracing_cpu(cpu) {
3050                 if (iter->buffer_iter[cpu])
3051                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3052         }
3053
3054         if (iter->trace && iter->trace->close)
3055                 iter->trace->close(iter);
3056
3057         if (!iter->snapshot)
3058                 /* reenable tracing if it was previously enabled */
3059                 tracing_start_tr(tr);
3060
3061         __trace_array_put(tr);
3062
3063         mutex_unlock(&trace_types_lock);
3064
3065         mutex_destroy(&iter->mutex);
3066         free_cpumask_var(iter->started);
3067         kfree(iter->trace);
3068         kfree(iter->buffer_iter);
3069         seq_release_private(inode, file);
3070
3071         return 0;
3072 }
3073
3074 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3075 {
3076         struct trace_array *tr = inode->i_private;
3077
3078         trace_array_put(tr);
3079         return 0;
3080 }
3081
3082 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3083 {
3084         struct trace_array *tr = inode->i_private;
3085
3086         trace_array_put(tr);
3087
3088         return single_release(inode, file);
3089 }
3090
3091 static int tracing_open(struct inode *inode, struct file *file)
3092 {
3093         struct trace_array *tr = inode->i_private;
3094         struct trace_iterator *iter;
3095         int ret = 0;
3096
3097         if (trace_array_get(tr) < 0)
3098                 return -ENODEV;
3099
3100         /* If this file was open for write, then erase contents */
3101         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3102                 int cpu = tracing_get_cpu(inode);
3103
3104                 if (cpu == RING_BUFFER_ALL_CPUS)
3105                         tracing_reset_online_cpus(&tr->trace_buffer);
3106                 else
3107                         tracing_reset(&tr->trace_buffer, cpu);
3108         }
3109
3110         if (file->f_mode & FMODE_READ) {
3111                 iter = __tracing_open(inode, file, false);
3112                 if (IS_ERR(iter))
3113                         ret = PTR_ERR(iter);
3114                 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
3115                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3116         }
3117
3118         if (ret < 0)
3119                 trace_array_put(tr);
3120
3121         return ret;
3122 }
3123
3124 static void *
3125 t_next(struct seq_file *m, void *v, loff_t *pos)
3126 {
3127         struct tracer *t = v;
3128
3129         (*pos)++;
3130
3131         if (t)
3132                 t = t->next;
3133
3134         return t;
3135 }
3136
3137 static void *t_start(struct seq_file *m, loff_t *pos)
3138 {
3139         struct tracer *t;
3140         loff_t l = 0;
3141
3142         mutex_lock(&trace_types_lock);
3143         for (t = trace_types; t && l < *pos; t = t_next(m, t, &l))
3144                 ;
3145
3146         return t;
3147 }
3148
3149 static void t_stop(struct seq_file *m, void *p)
3150 {
3151         mutex_unlock(&trace_types_lock);
3152 }
3153
3154 static int t_show(struct seq_file *m, void *v)
3155 {
3156         struct tracer *t = v;
3157
3158         if (!t)
3159                 return 0;
3160
3161         seq_printf(m, "%s", t->name);
3162         if (t->next)
3163                 seq_putc(m, ' ');
3164         else
3165                 seq_putc(m, '\n');
3166
3167         return 0;
3168 }
3169
3170 static const struct seq_operations show_traces_seq_ops = {
3171         .start          = t_start,
3172         .next           = t_next,
3173         .stop           = t_stop,
3174         .show           = t_show,
3175 };
3176
3177 static int show_traces_open(struct inode *inode, struct file *file)
3178 {
3179         if (tracing_disabled)
3180                 return -ENODEV;
3181
3182         return seq_open(file, &show_traces_seq_ops);
3183 }
3184
3185 static ssize_t
3186 tracing_write_stub(struct file *filp, const char __user *ubuf,
3187                    size_t count, loff_t *ppos)
3188 {
3189         return count;
3190 }
3191
3192 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3193 {
3194         int ret;
3195
3196         if (file->f_mode & FMODE_READ)
3197                 ret = seq_lseek(file, offset, whence);
3198         else
3199                 file->f_pos = ret = 0;
3200
3201         return ret;
3202 }
3203
3204 static const struct file_operations tracing_fops = {
3205         .open           = tracing_open,
3206         .read           = seq_read,
3207         .write          = tracing_write_stub,
3208         .llseek         = tracing_lseek,
3209         .release        = tracing_release,
3210 };
3211
3212 static const struct file_operations show_traces_fops = {
3213         .open           = show_traces_open,
3214         .read           = seq_read,
3215         .release        = seq_release,
3216         .llseek         = seq_lseek,
3217 };
3218
3219 /*
3220  * The tracer itself will not take this lock, but still we want
3221  * to provide a consistent cpumask to user-space:
3222  */
3223 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3224
3225 /*
3226  * Temporary storage for the character representation of the
3227  * CPU bitmask (and one more byte for the newline):
3228  */
3229 static char mask_str[NR_CPUS + 1];
3230
3231 static ssize_t
3232 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3233                      size_t count, loff_t *ppos)
3234 {
3235         struct trace_array *tr = file_inode(filp)->i_private;
3236         int len;
3237
3238         mutex_lock(&tracing_cpumask_update_lock);
3239
3240         len = cpumask_scnprintf(mask_str, count, tr->tracing_cpumask);
3241         if (count - len < 2) {
3242                 count = -EINVAL;
3243                 goto out_err;
3244         }
3245         len += sprintf(mask_str + len, "\n");
3246         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3247
3248 out_err:
3249         mutex_unlock(&tracing_cpumask_update_lock);
3250
3251         return count;
3252 }
3253
3254 static ssize_t
3255 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3256                       size_t count, loff_t *ppos)
3257 {
3258         struct trace_array *tr = file_inode(filp)->i_private;
3259         cpumask_var_t tracing_cpumask_new;
3260         int err, cpu;
3261
3262         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3263                 return -ENOMEM;
3264
3265         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3266         if (err)
3267                 goto err_unlock;
3268
3269         mutex_lock(&tracing_cpumask_update_lock);
3270
3271         local_irq_disable();
3272         arch_spin_lock(&ftrace_max_lock);
3273         for_each_tracing_cpu(cpu) {
3274                 /*
3275                  * Increase/decrease the disabled counter if we are
3276                  * about to flip a bit in the cpumask:
3277                  */
3278                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3279                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3280                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3281                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3282                 }
3283                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3284                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3285                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3286                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3287                 }
3288         }
3289         arch_spin_unlock(&ftrace_max_lock);
3290         local_irq_enable();
3291
3292         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3293
3294         mutex_unlock(&tracing_cpumask_update_lock);
3295         free_cpumask_var(tracing_cpumask_new);
3296
3297         return count;
3298
3299 err_unlock:
3300         free_cpumask_var(tracing_cpumask_new);
3301
3302         return err;
3303 }
3304
3305 static const struct file_operations tracing_cpumask_fops = {
3306         .open           = tracing_open_generic_tr,
3307         .read           = tracing_cpumask_read,
3308         .write          = tracing_cpumask_write,
3309         .release        = tracing_release_generic_tr,
3310         .llseek         = generic_file_llseek,
3311 };
3312
3313 static int tracing_trace_options_show(struct seq_file *m, void *v)
3314 {
3315         struct tracer_opt *trace_opts;
3316         struct trace_array *tr = m->private;
3317         u32 tracer_flags;
3318         int i;
3319
3320         mutex_lock(&trace_types_lock);
3321         tracer_flags = tr->current_trace->flags->val;
3322         trace_opts = tr->current_trace->flags->opts;
3323
3324         for (i = 0; trace_options[i]; i++) {
3325                 if (trace_flags & (1 << i))
3326                         seq_printf(m, "%s\n", trace_options[i]);
3327                 else
3328                         seq_printf(m, "no%s\n", trace_options[i]);
3329         }
3330
3331         for (i = 0; trace_opts[i].name; i++) {
3332                 if (tracer_flags & trace_opts[i].bit)
3333                         seq_printf(m, "%s\n", trace_opts[i].name);
3334                 else
3335                         seq_printf(m, "no%s\n", trace_opts[i].name);
3336         }
3337         mutex_unlock(&trace_types_lock);
3338
3339         return 0;
3340 }
3341
3342 static int __set_tracer_option(struct tracer *trace,
3343                                struct tracer_flags *tracer_flags,
3344                                struct tracer_opt *opts, int neg)
3345 {
3346         int ret;
3347
3348         ret = trace->set_flag(tracer_flags->val, opts->bit, !neg);
3349         if (ret)
3350                 return ret;
3351
3352         if (neg)
3353                 tracer_flags->val &= ~opts->bit;
3354         else
3355                 tracer_flags->val |= opts->bit;
3356         return 0;
3357 }
3358
3359 /* Try to assign a tracer specific option */
3360 static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
3361 {
3362         struct tracer_flags *tracer_flags = trace->flags;
3363         struct tracer_opt *opts = NULL;
3364         int i;
3365
3366         for (i = 0; tracer_flags->opts[i].name; i++) {
3367                 opts = &tracer_flags->opts[i];
3368
3369                 if (strcmp(cmp, opts->name) == 0)
3370                         return __set_tracer_option(trace, trace->flags,
3371                                                    opts, neg);
3372         }
3373
3374         return -EINVAL;
3375 }
3376
3377 /* Some tracers require overwrite to stay enabled */
3378 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3379 {
3380         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3381                 return -1;
3382
3383         return 0;
3384 }
3385
3386 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3387 {
3388         /* do nothing if flag is already set */
3389         if (!!(trace_flags & mask) == !!enabled)
3390                 return 0;
3391
3392         /* Give the tracer a chance to approve the change */
3393         if (tr->current_trace->flag_changed)
3394                 if (tr->current_trace->flag_changed(tr->current_trace, mask, !!enabled))
3395                         return -EINVAL;
3396
3397         if (enabled)
3398                 trace_flags |= mask;
3399         else
3400                 trace_flags &= ~mask;
3401
3402         if (mask == TRACE_ITER_RECORD_CMD)
3403                 trace_event_enable_cmd_record(enabled);
3404
3405         if (mask == TRACE_ITER_OVERWRITE) {
3406                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3407 #ifdef CONFIG_TRACER_MAX_TRACE
3408                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3409 #endif
3410         }
3411
3412         if (mask == TRACE_ITER_PRINTK)
3413                 trace_printk_start_stop_comm(enabled);
3414
3415         return 0;
3416 }
3417
3418 static int trace_set_options(struct trace_array *tr, char *option)
3419 {
3420         char *cmp;
3421         int neg = 0;
3422         int ret = -ENODEV;
3423         int i;
3424
3425         cmp = strstrip(option);
3426
3427         if (strncmp(cmp, "no", 2) == 0) {
3428                 neg = 1;
3429                 cmp += 2;
3430         }
3431
3432         mutex_lock(&trace_types_lock);
3433
3434         for (i = 0; trace_options[i]; i++) {
3435                 if (strcmp(cmp, trace_options[i]) == 0) {
3436                         ret = set_tracer_flag(tr, 1 << i, !neg);
3437                         break;
3438                 }
3439         }
3440
3441         /* If no option could be set, test the specific tracer options */
3442         if (!trace_options[i])
3443                 ret = set_tracer_option(tr->current_trace, cmp, neg);
3444
3445         mutex_unlock(&trace_types_lock);
3446
3447         return ret;
3448 }
3449
3450 static ssize_t
3451 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3452                         size_t cnt, loff_t *ppos)
3453 {
3454         struct seq_file *m = filp->private_data;
3455         struct trace_array *tr = m->private;
3456         char buf[64];
3457         int ret;
3458
3459         if (cnt >= sizeof(buf))
3460                 return -EINVAL;
3461
3462         if (copy_from_user(&buf, ubuf, cnt))
3463                 return -EFAULT;
3464
3465         buf[cnt] = 0;
3466
3467         ret = trace_set_options(tr, buf);
3468         if (ret < 0)
3469                 return ret;
3470
3471         *ppos += cnt;
3472
3473         return cnt;
3474 }
3475
3476 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3477 {
3478         struct trace_array *tr = inode->i_private;
3479         int ret;
3480
3481         if (tracing_disabled)
3482                 return -ENODEV;
3483
3484         if (trace_array_get(tr) < 0)
3485                 return -ENODEV;
3486
3487         ret = single_open(file, tracing_trace_options_show, inode->i_private);
3488         if (ret < 0)
3489                 trace_array_put(tr);
3490
3491         return ret;
3492 }
3493
3494 static const struct file_operations tracing_iter_fops = {
3495         .open           = tracing_trace_options_open,
3496         .read           = seq_read,
3497         .llseek         = seq_lseek,
3498         .release        = tracing_single_release_tr,
3499         .write          = tracing_trace_options_write,
3500 };
3501
3502 static const char readme_msg[] =
3503         "tracing mini-HOWTO:\n\n"
3504         "# echo 0 > tracing_on : quick way to disable tracing\n"
3505         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3506         " Important files:\n"
3507         "  trace\t\t\t- The static contents of the buffer\n"
3508         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
3509         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3510         "  current_tracer\t- function and latency tracers\n"
3511         "  available_tracers\t- list of configured tracers for current_tracer\n"
3512         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3513         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3514         "  trace_clock\t\t-change the clock used to order events\n"
3515         "       local:   Per cpu clock but may not be synced across CPUs\n"
3516         "      global:   Synced across CPUs but slows tracing down.\n"
3517         "     counter:   Not a clock, but just an increment\n"
3518         "      uptime:   Jiffy counter from time of boot\n"
3519         "        perf:   Same clock that perf events use\n"
3520 #ifdef CONFIG_X86_64
3521         "     x86-tsc:   TSC cycle counter\n"
3522 #endif
3523         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3524         "  tracing_cpumask\t- Limit which CPUs to trace\n"
3525         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3526         "\t\t\t  Remove sub-buffer with rmdir\n"
3527         "  trace_options\t\t- Set format or modify how tracing happens\n"
3528         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
3529         "\t\t\t  option name\n"
3530 #ifdef CONFIG_DYNAMIC_FTRACE
3531         "\n  available_filter_functions - list of functions that can be filtered on\n"
3532         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
3533         "\t\t\t  functions\n"
3534         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3535         "\t     modules: Can select a group via module\n"
3536         "\t      Format: :mod:<module-name>\n"
3537         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
3538         "\t    triggers: a command to perform when function is hit\n"
3539         "\t      Format: <function>:<trigger>[:count]\n"
3540         "\t     trigger: traceon, traceoff\n"
3541         "\t\t      enable_event:<system>:<event>\n"
3542         "\t\t      disable_event:<system>:<event>\n"
3543 #ifdef CONFIG_STACKTRACE
3544         "\t\t      stacktrace\n"
3545 #endif
3546 #ifdef CONFIG_TRACER_SNAPSHOT
3547         "\t\t      snapshot\n"
3548 #endif
3549         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
3550         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
3551         "\t     The first one will disable tracing every time do_fault is hit\n"
3552         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
3553         "\t       The first time do trap is hit and it disables tracing, the\n"
3554         "\t       counter will decrement to 2. If tracing is already disabled,\n"
3555         "\t       the counter will not decrement. It only decrements when the\n"
3556         "\t       trigger did work\n"
3557         "\t     To remove trigger without count:\n"
3558         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
3559         "\t     To remove trigger with a count:\n"
3560         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3561         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3562         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3563         "\t    modules: Can select a group via module command :mod:\n"
3564         "\t    Does not accept triggers\n"
3565 #endif /* CONFIG_DYNAMIC_FTRACE */
3566 #ifdef CONFIG_FUNCTION_TRACER
3567         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3568         "\t\t    (function)\n"
3569 #endif
3570 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3571         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3572         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3573 #endif
3574 #ifdef CONFIG_TRACER_SNAPSHOT
3575         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
3576         "\t\t\t  snapshot buffer. Read the contents for more\n"
3577         "\t\t\t  information\n"
3578 #endif
3579 #ifdef CONFIG_STACK_TRACER
3580         "  stack_trace\t\t- Shows the max stack trace when active\n"
3581         "  stack_max_size\t- Shows current max stack size that was traced\n"
3582         "\t\t\t  Write into this file to reset the max size (trigger a\n"
3583         "\t\t\t  new trace)\n"
3584 #ifdef CONFIG_DYNAMIC_FTRACE
3585         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3586         "\t\t\t  traces\n"
3587 #endif
3588 #endif /* CONFIG_STACK_TRACER */
3589         "  events/\t\t- Directory containing all trace event subsystems:\n"
3590         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3591         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
3592         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3593         "\t\t\t  events\n"
3594         "      filter\t\t- If set, only events passing filter are traced\n"
3595         "  events/<system>/<event>/\t- Directory containing control files for\n"
3596         "\t\t\t  <event>:\n"
3597         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3598         "      filter\t\t- If set, only events passing filter are traced\n"
3599         "      trigger\t\t- If set, a command to perform when event is hit\n"
3600         "\t    Format: <trigger>[:count][if <filter>]\n"
3601         "\t   trigger: traceon, traceoff\n"
3602         "\t            enable_event:<system>:<event>\n"
3603         "\t            disable_event:<system>:<event>\n"
3604 #ifdef CONFIG_STACKTRACE
3605         "\t\t    stacktrace\n"
3606 #endif
3607 #ifdef CONFIG_TRACER_SNAPSHOT
3608         "\t\t    snapshot\n"
3609 #endif
3610         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
3611         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
3612         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3613         "\t                  events/block/block_unplug/trigger\n"
3614         "\t   The first disables tracing every time block_unplug is hit.\n"
3615         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
3616         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
3617         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3618         "\t   Like function triggers, the counter is only decremented if it\n"
3619         "\t    enabled or disabled tracing.\n"
3620         "\t   To remove a trigger without a count:\n"
3621         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
3622         "\t   To remove a trigger with a count:\n"
3623         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
3624         "\t   Filters can be ignored when removing a trigger.\n"
3625 ;
3626
3627 static ssize_t
3628 tracing_readme_read(struct file *filp, char __user *ubuf,
3629                        size_t cnt, loff_t *ppos)
3630 {
3631         return simple_read_from_buffer(ubuf, cnt, ppos,
3632                                         readme_msg, strlen(readme_msg));
3633 }
3634
3635 static const struct file_operations tracing_readme_fops = {
3636         .open           = tracing_open_generic,
3637         .read           = tracing_readme_read,
3638         .llseek         = generic_file_llseek,
3639 };
3640
3641 static ssize_t
3642 tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
3643                                 size_t cnt, loff_t *ppos)
3644 {
3645         char *buf_comm;
3646         char *file_buf;
3647         char *buf;
3648         int len = 0;
3649         int pid;
3650         int i;
3651
3652         file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL);
3653         if (!file_buf)
3654                 return -ENOMEM;
3655
3656         buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL);
3657         if (!buf_comm) {
3658                 kfree(file_buf);
3659                 return -ENOMEM;
3660         }
3661
3662         buf = file_buf;
3663
3664         for (i = 0; i < SAVED_CMDLINES; i++) {
3665                 int r;
3666
3667                 pid = map_cmdline_to_pid[i];
3668                 if (pid == -1 || pid == NO_CMDLINE_MAP)
3669                         continue;
3670
3671                 trace_find_cmdline(pid, buf_comm);
3672                 r = sprintf(buf, "%d %s\n", pid, buf_comm);
3673                 buf += r;
3674                 len += r;
3675         }
3676
3677         len = simple_read_from_buffer(ubuf, cnt, ppos,
3678                                       file_buf, len);
3679
3680         kfree(file_buf);
3681         kfree(buf_comm);
3682
3683         return len;
3684 }
3685
3686 static const struct file_operations tracing_saved_cmdlines_fops = {
3687     .open       = tracing_open_generic,
3688     .read       = tracing_saved_cmdlines_read,
3689     .llseek     = generic_file_llseek,
3690 };
3691
3692 static ssize_t
3693 tracing_set_trace_read(struct file *filp, char __user *ubuf,
3694                        size_t cnt, loff_t *ppos)
3695 {
3696         struct trace_array *tr = filp->private_data;
3697         char buf[MAX_TRACER_SIZE+2];
3698         int r;
3699
3700         mutex_lock(&trace_types_lock);
3701         r = sprintf(buf, "%s\n", tr->current_trace->name);
3702         mutex_unlock(&trace_types_lock);
3703
3704         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3705 }
3706
3707 int tracer_init(struct tracer *t, struct trace_array *tr)
3708 {
3709         tracing_reset_online_cpus(&tr->trace_buffer);
3710         return t->init(tr);
3711 }
3712
3713 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
3714 {
3715         int cpu;
3716
3717         for_each_tracing_cpu(cpu)
3718                 per_cpu_ptr(buf->data, cpu)->entries = val;
3719 }
3720
3721 #ifdef CONFIG_TRACER_MAX_TRACE
3722 /* resize @tr's buffer to the size of @size_tr's entries */
3723 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
3724                                         struct trace_buffer *size_buf, int cpu_id)
3725 {
3726         int cpu, ret = 0;
3727
3728         if (cpu_id == RING_BUFFER_ALL_CPUS) {
3729                 for_each_tracing_cpu(cpu) {
3730                         ret = ring_buffer_resize(trace_buf->buffer,
3731                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
3732                         if (ret < 0)
3733                                 break;
3734                         per_cpu_ptr(trace_buf->data, cpu)->entries =
3735                                 per_cpu_ptr(size_buf->data, cpu)->entries;
3736                 }
3737         } else {
3738                 ret = ring_buffer_resize(trace_buf->buffer,
3739                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
3740                 if (ret == 0)
3741                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
3742                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
3743         }
3744
3745         return ret;
3746 }
3747 #endif /* CONFIG_TRACER_MAX_TRACE */
3748
3749 static int __tracing_resize_ring_buffer(struct trace_array *tr,
3750                                         unsigned long size, int cpu)
3751 {
3752         int ret;
3753
3754         /*
3755          * If kernel or user changes the size of the ring buffer
3756          * we use the size that was given, and we can forget about
3757          * expanding it later.
3758          */
3759         ring_buffer_expanded = true;
3760
3761         /* May be called before buffers are initialized */
3762         if (!tr->trace_buffer.buffer)
3763                 return 0;
3764
3765         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
3766         if (ret < 0)
3767                 return ret;
3768
3769 #ifdef CONFIG_TRACER_MAX_TRACE
3770         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
3771             !tr->current_trace->use_max_tr)
3772                 goto out;
3773
3774         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
3775         if (ret < 0) {
3776                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
3777                                                      &tr->trace_buffer, cpu);
3778                 if (r < 0) {
3779                         /*
3780                          * AARGH! We are left with different
3781                          * size max buffer!!!!
3782                          * The max buffer is our "snapshot" buffer.
3783                          * When a tracer needs a snapshot (one of the
3784                          * latency tracers), it swaps the max buffer
3785                          * with the saved snap shot. We succeeded to
3786                          * update the size of the main buffer, but failed to
3787                          * update the size of the max buffer. But when we tried
3788                          * to reset the main buffer to the original size, we
3789                          * failed there too. This is very unlikely to
3790                          * happen, but if it does, warn and kill all
3791                          * tracing.
3792                          */
3793                         WARN_ON(1);
3794                         tracing_disabled = 1;
3795                 }
3796                 return ret;
3797         }
3798
3799         if (cpu == RING_BUFFER_ALL_CPUS)
3800                 set_buffer_entries(&tr->max_buffer, size);
3801         else
3802                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
3803
3804  out:
3805 #endif /* CONFIG_TRACER_MAX_TRACE */
3806
3807         if (cpu == RING_BUFFER_ALL_CPUS)
3808                 set_buffer_entries(&tr->trace_buffer, size);
3809         else
3810                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
3811
3812         return ret;
3813 }
3814
3815 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
3816                                           unsigned long size, int cpu_id)
3817 {
3818         int ret = size;
3819
3820         mutex_lock(&trace_types_lock);
3821
3822         if (cpu_id != RING_BUFFER_ALL_CPUS) {
3823                 /* make sure, this cpu is enabled in the mask */
3824                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
3825                         ret = -EINVAL;
3826                         goto out;
3827                 }
3828         }
3829
3830         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
3831         if (ret < 0)
3832                 ret = -ENOMEM;
3833
3834 out:
3835         mutex_unlock(&trace_types_lock);
3836
3837         return ret;
3838 }
3839
3840
3841 /**
3842  * tracing_update_buffers - used by tracing facility to expand ring buffers
3843  *
3844  * To save on memory when the tracing is never used on a system with it
3845  * configured in. The ring buffers are set to a minimum size. But once
3846  * a user starts to use the tracing facility, then they need to grow
3847  * to their default size.
3848  *
3849  * This function is to be called when a tracer is about to be used.
3850  */
3851 int tracing_update_buffers(void)
3852 {
3853         int ret = 0;
3854
3855         mutex_lock(&trace_types_lock);
3856         if (!ring_buffer_expanded)
3857                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
3858                                                 RING_BUFFER_ALL_CPUS);
3859         mutex_unlock(&trace_types_lock);
3860
3861         return ret;
3862 }
3863
3864 struct trace_option_dentry;
3865
3866 static struct trace_option_dentry *
3867 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
3868
3869 static void
3870 destroy_trace_option_files(struct trace_option_dentry *topts);
3871
3872 static int tracing_set_tracer(const char *buf)
3873 {
3874         static struct trace_option_dentry *topts;
3875         struct trace_array *tr = &global_trace;
3876         struct tracer *t;
3877 #ifdef CONFIG_TRACER_MAX_TRACE
3878         bool had_max_tr;
3879 #endif
3880         int ret = 0;
3881
3882         mutex_lock(&trace_types_lock);
3883
3884         if (!ring_buffer_expanded) {
3885                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
3886                                                 RING_BUFFER_ALL_CPUS);
3887                 if (ret < 0)
3888                         goto out;
3889                 ret = 0;
3890         }
3891
3892         for (t = trace_types; t; t = t->next) {
3893                 if (strcmp(t->name, buf) == 0)
3894                         break;
3895         }
3896         if (!t) {
3897                 ret = -EINVAL;
3898                 goto out;
3899         }
3900         if (t == tr->current_trace)
3901                 goto out;
3902
3903         trace_branch_disable();
3904
3905         tr->current_trace->enabled = false;
3906
3907         if (tr->current_trace->reset)
3908                 tr->current_trace->reset(tr);
3909
3910         /* Current trace needs to be nop_trace before synchronize_sched */
3911         tr->current_trace = &nop_trace;
3912
3913 #ifdef CONFIG_TRACER_MAX_TRACE
3914         had_max_tr = tr->allocated_snapshot;
3915
3916         if (had_max_tr && !t->use_max_tr) {
3917                 /*
3918                  * We need to make sure that the update_max_tr sees that
3919                  * current_trace changed to nop_trace to keep it from
3920                  * swapping the buffers after we resize it.
3921                  * The update_max_tr is called from interrupts disabled
3922                  * so a synchronized_sched() is sufficient.
3923                  */
3924                 synchronize_sched();
3925                 free_snapshot(tr);
3926         }
3927 #endif
3928         destroy_trace_option_files(topts);
3929
3930         topts = create_trace_option_files(tr, t);
3931
3932 #ifdef CONFIG_TRACER_MAX_TRACE
3933         if (t->use_max_tr && !had_max_tr) {
3934                 ret = alloc_snapshot(tr);
3935                 if (ret < 0)
3936                         goto out;
3937         }
3938 #endif
3939
3940         if (t->init) {
3941                 ret = tracer_init(t, tr);
3942                 if (ret)
3943                         goto out;
3944         }
3945
3946         tr->current_trace = t;
3947         tr->current_trace->enabled = true;
3948         trace_branch_enable(tr);
3949  out:
3950         mutex_unlock(&trace_types_lock);
3951
3952         return ret;
3953 }
3954
3955 static ssize_t
3956 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
3957                         size_t cnt, loff_t *ppos)
3958 {
3959         char buf[MAX_TRACER_SIZE+1];
3960         int i;
3961         size_t ret;
3962         int err;
3963
3964         ret = cnt;
3965
3966         if (cnt > MAX_TRACER_SIZE)
3967                 cnt = MAX_TRACER_SIZE;
3968
3969         if (copy_from_user(&buf, ubuf, cnt))
3970                 return -EFAULT;
3971
3972         buf[cnt] = 0;
3973
3974         /* strip ending whitespace. */
3975         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
3976                 buf[i] = 0;
3977
3978         err = tracing_set_tracer(buf);
3979         if (err)
3980                 return err;
3981
3982         *ppos += ret;
3983
3984         return ret;
3985 }
3986
3987 static ssize_t
3988 tracing_max_lat_read(struct file *filp, char __user *ubuf,
3989                      size_t cnt, loff_t *ppos)
3990 {
3991         unsigned long *ptr = filp->private_data;
3992         char buf[64];
3993         int r;
3994
3995         r = snprintf(buf, sizeof(buf), "%ld\n",
3996                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
3997         if (r > sizeof(buf))
3998                 r = sizeof(buf);
3999         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4000 }
4001
4002 static ssize_t
4003 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4004                       size_t cnt, loff_t *ppos)
4005 {
4006         unsigned long *ptr = filp->private_data;
4007         unsigned long val;
4008         int ret;
4009
4010         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4011         if (ret)
4012                 return ret;
4013
4014         *ptr = val * 1000;
4015
4016         return cnt;
4017 }
4018
4019 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4020 {
4021         struct trace_array *tr = inode->i_private;
4022         struct trace_iterator *iter;
4023         int ret = 0;
4024
4025         if (tracing_disabled)
4026                 return -ENODEV;
4027
4028         if (trace_array_get(tr) < 0)
4029                 return -ENODEV;
4030
4031         mutex_lock(&trace_types_lock);
4032
4033         /* create a buffer to store the information to pass to userspace */
4034         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4035         if (!iter) {
4036                 ret = -ENOMEM;
4037                 __trace_array_put(tr);
4038                 goto out;
4039         }
4040
4041         /*
4042          * We make a copy of the current tracer to avoid concurrent
4043          * changes on it while we are reading.
4044          */
4045         iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
4046         if (!iter->trace) {
4047                 ret = -ENOMEM;
4048                 goto fail;
4049         }
4050         *iter->trace = *tr->current_trace;
4051
4052         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4053                 ret = -ENOMEM;
4054                 goto fail;
4055         }
4056
4057         /* trace pipe does not show start of buffer */
4058         cpumask_setall(iter->started);
4059
4060         if (trace_flags & TRACE_ITER_LATENCY_FMT)
4061                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4062
4063         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4064         if (trace_clocks[tr->clock_id].in_ns)
4065                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4066
4067         iter->tr = tr;
4068         iter->trace_buffer = &tr->trace_buffer;
4069         iter->cpu_file = tracing_get_cpu(inode);
4070         mutex_init(&iter->mutex);
4071         filp->private_data = iter;
4072
4073         if (iter->trace->pipe_open)
4074                 iter->trace->pipe_open(iter);
4075
4076         nonseekable_open(inode, filp);
4077 out:
4078         mutex_unlock(&trace_types_lock);
4079         return ret;
4080
4081 fail:
4082         kfree(iter->trace);
4083         kfree(iter);
4084         __trace_array_put(tr);
4085         mutex_unlock(&trace_types_lock);
4086         return ret;
4087 }
4088
4089 static int tracing_release_pipe(struct inode *inode, struct file *file)
4090 {
4091         struct trace_iterator *iter = file->private_data;
4092         struct trace_array *tr = inode->i_private;
4093
4094         mutex_lock(&trace_types_lock);
4095
4096         if (iter->trace->pipe_close)
4097                 iter->trace->pipe_close(iter);
4098
4099         mutex_unlock(&trace_types_lock);
4100
4101         free_cpumask_var(iter->started);
4102         mutex_destroy(&iter->mutex);
4103         kfree(iter->trace);
4104         kfree(iter);
4105
4106         trace_array_put(tr);
4107
4108         return 0;
4109 }
4110
4111 static unsigned int
4112 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4113 {
4114         /* Iterators are static, they should be filled or empty */
4115         if (trace_buffer_iter(iter, iter->cpu_file))
4116                 return POLLIN | POLLRDNORM;
4117
4118         if (trace_flags & TRACE_ITER_BLOCK)
4119                 /*
4120                  * Always select as readable when in blocking mode
4121                  */
4122                 return POLLIN | POLLRDNORM;
4123         else
4124                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4125                                              filp, poll_table);
4126 }
4127
4128 static unsigned int
4129 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4130 {
4131         struct trace_iterator *iter = filp->private_data;
4132
4133         return trace_poll(iter, filp, poll_table);
4134 }
4135
4136 /*
4137  * This is a make-shift waitqueue.
4138  * A tracer might use this callback on some rare cases:
4139  *
4140  *  1) the current tracer might hold the runqueue lock when it wakes up
4141  *     a reader, hence a deadlock (sched, function, and function graph tracers)
4142  *  2) the function tracers, trace all functions, we don't want
4143  *     the overhead of calling wake_up and friends
4144  *     (and tracing them too)
4145  *
4146  *     Anyway, this is really very primitive wakeup.
4147  */
4148 void poll_wait_pipe(struct trace_iterator *iter)
4149 {
4150         set_current_state(TASK_INTERRUPTIBLE);
4151         /* sleep for 100 msecs, and try again. */
4152         schedule_timeout(HZ / 10);
4153 }
4154
4155 /* Must be called with trace_types_lock mutex held. */
4156 static int tracing_wait_pipe(struct file *filp)
4157 {
4158         struct trace_iterator *iter = filp->private_data;
4159
4160         while (trace_empty(iter)) {
4161
4162                 if ((filp->f_flags & O_NONBLOCK)) {
4163                         return -EAGAIN;
4164                 }
4165
4166                 mutex_unlock(&iter->mutex);
4167
4168                 iter->trace->wait_pipe(iter);
4169
4170                 mutex_lock(&iter->mutex);
4171
4172                 if (signal_pending(current))
4173                         return -EINTR;
4174
4175                 /*
4176                  * We block until we read something and tracing is disabled.
4177                  * We still block if tracing is disabled, but we have never
4178                  * read anything. This allows a user to cat this file, and
4179                  * then enable tracing. But after we have read something,
4180                  * we give an EOF when tracing is again disabled.
4181                  *
4182                  * iter->pos will be 0 if we haven't read anything.
4183                  */
4184                 if (!tracing_is_on() && iter->pos)
4185                         break;
4186         }
4187
4188         return 1;
4189 }
4190
4191 /*
4192  * Consumer reader.
4193  */
4194 static ssize_t
4195 tracing_read_pipe(struct file *filp, char __user *ubuf,
4196                   size_t cnt, loff_t *ppos)
4197 {
4198         struct trace_iterator *iter = filp->private_data;
4199         struct trace_array *tr = iter->tr;
4200         ssize_t sret;
4201
4202         /* return any leftover data */
4203         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4204         if (sret != -EBUSY)
4205                 return sret;
4206
4207         trace_seq_init(&iter->seq);
4208
4209         /* copy the tracer to avoid using a global lock all around */
4210         mutex_lock(&trace_types_lock);
4211         if (unlikely(iter->trace->name != tr->current_trace->name))
4212                 *iter->trace = *tr->current_trace;
4213         mutex_unlock(&trace_types_lock);
4214
4215         /*
4216          * Avoid more than one consumer on a single file descriptor
4217          * This is just a matter of traces coherency, the ring buffer itself
4218          * is protected.
4219          */
4220         mutex_lock(&iter->mutex);
4221         if (iter->trace->read) {
4222                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4223                 if (sret)
4224                         goto out;
4225         }
4226
4227 waitagain:
4228         sret = tracing_wait_pipe(filp);
4229         if (sret <= 0)
4230                 goto out;
4231
4232         /* stop when tracing is finished */
4233         if (trace_empty(iter)) {
4234                 sret = 0;
4235                 goto out;
4236         }
4237
4238         if (cnt >= PAGE_SIZE)
4239                 cnt = PAGE_SIZE - 1;
4240
4241         /* reset all but tr, trace, and overruns */
4242         memset(&iter->seq, 0,
4243                sizeof(struct trace_iterator) -
4244                offsetof(struct trace_iterator, seq));
4245         cpumask_clear(iter->started);
4246         iter->pos = -1;
4247
4248         trace_event_read_lock();
4249         trace_access_lock(iter->cpu_file);
4250         while (trace_find_next_entry_inc(iter) != NULL) {
4251                 enum print_line_t ret;
4252                 int len = iter->seq.len;
4253
4254                 ret = print_trace_line(iter);
4255                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4256                         /* don't print partial lines */
4257                         iter->seq.len = len;
4258                         break;
4259                 }
4260                 if (ret != TRACE_TYPE_NO_CONSUME)
4261                         trace_consume(iter);
4262
4263                 if (iter->seq.len >= cnt)
4264                         break;
4265
4266                 /*
4267                  * Setting the full flag means we reached the trace_seq buffer
4268                  * size and we should leave by partial output condition above.
4269                  * One of the trace_seq_* functions is not used properly.
4270                  */
4271                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4272                           iter->ent->type);
4273         }
4274         trace_access_unlock(iter->cpu_file);
4275         trace_event_read_unlock();
4276
4277         /* Now copy what we have to the user */
4278         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4279         if (iter->seq.readpos >= iter->seq.len)
4280                 trace_seq_init(&iter->seq);
4281
4282         /*
4283          * If there was nothing to send to user, in spite of consuming trace
4284          * entries, go back to wait for more entries.
4285          */
4286         if (sret == -EBUSY)
4287                 goto waitagain;
4288
4289 out:
4290         mutex_unlock(&iter->mutex);
4291
4292         return sret;
4293 }
4294
4295 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4296                                      unsigned int idx)
4297 {
4298         __free_page(spd->pages[idx]);
4299 }
4300
4301 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4302         .can_merge              = 0,
4303         .map                    = generic_pipe_buf_map,
4304         .unmap                  = generic_pipe_buf_unmap,
4305         .confirm                = generic_pipe_buf_confirm,
4306         .release                = generic_pipe_buf_release,
4307         .steal                  = generic_pipe_buf_steal,
4308         .get                    = generic_pipe_buf_get,
4309 };
4310
4311 static size_t
4312 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4313 {
4314         size_t count;
4315         int ret;
4316
4317         /* Seq buffer is page-sized, exactly what we need. */
4318         for (;;) {
4319                 count = iter->seq.len;
4320                 ret = print_trace_line(iter);
4321                 count = iter->seq.len - count;
4322                 if (rem < count) {
4323                         rem = 0;
4324                         iter->seq.len -= count;
4325                         break;
4326                 }
4327                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4328                         iter->seq.len -= count;
4329                         break;
4330                 }
4331
4332                 if (ret != TRACE_TYPE_NO_CONSUME)
4333                         trace_consume(iter);
4334                 rem -= count;
4335                 if (!trace_find_next_entry_inc(iter))   {
4336                         rem = 0;
4337                         iter->ent = NULL;
4338                         break;
4339                 }
4340         }
4341
4342         return rem;
4343 }
4344
4345 static ssize_t tracing_splice_read_pipe(struct file *filp,
4346                                         loff_t *ppos,
4347                                         struct pipe_inode_info *pipe,
4348                                         size_t len,
4349                                         unsigned int flags)
4350 {
4351         struct page *pages_def[PIPE_DEF_BUFFERS];
4352         struct partial_page partial_def[PIPE_DEF_BUFFERS];
4353         struct trace_iterator *iter = filp->private_data;
4354         struct splice_pipe_desc spd = {
4355                 .pages          = pages_def,
4356                 .partial        = partial_def,
4357                 .nr_pages       = 0, /* This gets updated below. */
4358                 .nr_pages_max   = PIPE_DEF_BUFFERS,
4359                 .flags          = flags,
4360                 .ops            = &tracing_pipe_buf_ops,
4361                 .spd_release    = tracing_spd_release_pipe,
4362         };
4363         struct trace_array *tr = iter->tr;
4364         ssize_t ret;
4365         size_t rem;
4366         unsigned int i;
4367
4368         if (splice_grow_spd(pipe, &spd))
4369                 return -ENOMEM;
4370
4371         /* copy the tracer to avoid using a global lock all around */
4372         mutex_lock(&trace_types_lock);
4373         if (unlikely(iter->trace->name != tr->current_trace->name))
4374                 *iter->trace = *tr->current_trace;
4375         mutex_unlock(&trace_types_lock);
4376
4377         mutex_lock(&iter->mutex);
4378
4379         if (iter->trace->splice_read) {
4380                 ret = iter->trace->splice_read(iter, filp,
4381                                                ppos, pipe, len, flags);
4382                 if (ret)
4383                         goto out_err;
4384         }
4385
4386         ret = tracing_wait_pipe(filp);
4387         if (ret <= 0)
4388                 goto out_err;
4389
4390         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4391                 ret = -EFAULT;
4392                 goto out_err;
4393         }
4394
4395         trace_event_read_lock();
4396         trace_access_lock(iter->cpu_file);
4397
4398         /* Fill as many pages as possible. */
4399         for (i = 0, rem = len; i < pipe->buffers && rem; i++) {
4400                 spd.pages[i] = alloc_page(GFP_KERNEL);
4401                 if (!spd.pages[i])
4402                         break;
4403
4404                 rem = tracing_fill_pipe_page(rem, iter);
4405
4406                 /* Copy the data into the page, so we can start over. */
4407                 ret = trace_seq_to_buffer(&iter->seq,
4408                                           page_address(spd.pages[i]),
4409                                           iter->seq.len);
4410                 if (ret < 0) {
4411                         __free_page(spd.pages[i]);
4412                         break;
4413                 }
4414                 spd.partial[i].offset = 0;
4415                 spd.partial[i].len = iter->seq.len;
4416
4417                 trace_seq_init(&iter->seq);
4418         }
4419
4420         trace_access_unlock(iter->cpu_file);
4421         trace_event_read_unlock();
4422         mutex_unlock(&iter->mutex);
4423
4424         spd.nr_pages = i;
4425
4426         ret = splice_to_pipe(pipe, &spd);
4427 out:
4428         splice_shrink_spd(&spd);
4429         return ret;
4430
4431 out_err:
4432         mutex_unlock(&iter->mutex);
4433         goto out;
4434 }
4435
4436 static ssize_t
4437 tracing_entries_read(struct file *filp, char __user *ubuf,
4438                      size_t cnt, loff_t *ppos)
4439 {
4440         struct inode *inode = file_inode(filp);
4441         struct trace_array *tr = inode->i_private;
4442         int cpu = tracing_get_cpu(inode);
4443         char buf[64];
4444         int r = 0;
4445         ssize_t ret;
4446
4447         mutex_lock(&trace_types_lock);
4448
4449         if (cpu == RING_BUFFER_ALL_CPUS) {
4450                 int cpu, buf_size_same;
4451                 unsigned long size;
4452
4453                 size = 0;
4454                 buf_size_same = 1;
4455                 /* check if all cpu sizes are same */
4456                 for_each_tracing_cpu(cpu) {
4457                         /* fill in the size from first enabled cpu */
4458                         if (size == 0)
4459                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
4460                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
4461                                 buf_size_same = 0;
4462                                 break;
4463                         }
4464                 }
4465
4466                 if (buf_size_same) {
4467                         if (!ring_buffer_expanded)
4468                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
4469                                             size >> 10,
4470                                             trace_buf_size >> 10);
4471                         else
4472                                 r = sprintf(buf, "%lu\n", size >> 10);
4473                 } else
4474                         r = sprintf(buf, "X\n");
4475         } else
4476                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
4477
4478         mutex_unlock(&trace_types_lock);
4479
4480         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4481         return ret;
4482 }
4483
4484 static ssize_t
4485 tracing_entries_write(struct file *filp, const char __user *ubuf,
4486                       size_t cnt, loff_t *ppos)
4487 {
4488         struct inode *inode = file_inode(filp);
4489         struct trace_array *tr = inode->i_private;
4490         unsigned long val;
4491         int ret;
4492
4493         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4494         if (ret)
4495                 return ret;
4496
4497         /* must have at least 1 entry */
4498         if (!val)
4499                 return -EINVAL;
4500
4501         /* value is in KB */
4502         val <<= 10;
4503         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
4504         if (ret < 0)
4505                 return ret;
4506
4507         *ppos += cnt;
4508
4509         return cnt;
4510 }
4511
4512 static ssize_t
4513 tracing_total_entries_read(struct file *filp, char __user *ubuf,
4514                                 size_t cnt, loff_t *ppos)
4515 {
4516         struct trace_array *tr = filp->private_data;
4517         char buf[64];
4518         int r, cpu;
4519         unsigned long size = 0, expanded_size = 0;
4520
4521         mutex_lock(&trace_types_lock);
4522         for_each_tracing_cpu(cpu) {
4523                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
4524                 if (!ring_buffer_expanded)
4525                         expanded_size += trace_buf_size >> 10;
4526         }
4527         if (ring_buffer_expanded)
4528                 r = sprintf(buf, "%lu\n", size);
4529         else
4530                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
4531         mutex_unlock(&trace_types_lock);
4532
4533         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4534 }
4535
4536 static ssize_t
4537 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
4538                           size_t cnt, loff_t *ppos)
4539 {
4540         /*
4541          * There is no need to read what the user has written, this function
4542          * is just to make sure that there is no error when "echo" is used
4543          */
4544
4545         *ppos += cnt;
4546
4547         return cnt;
4548 }
4549
4550 static int
4551 tracing_free_buffer_release(struct inode *inode, struct file *filp)
4552 {
4553         struct trace_array *tr = inode->i_private;
4554
4555         /* disable tracing ? */
4556         if (trace_flags & TRACE_ITER_STOP_ON_FREE)
4557                 tracer_tracing_off(tr);
4558         /* resize the ring buffer to 0 */
4559         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
4560
4561         trace_array_put(tr);
4562
4563         return 0;
4564 }
4565
4566 static ssize_t
4567 tracing_mark_write(struct file *filp, const char __user *ubuf,
4568                                         size_t cnt, loff_t *fpos)
4569 {
4570         unsigned long addr = (unsigned long)ubuf;
4571         struct trace_array *tr = filp->private_data;
4572         struct ring_buffer_event *event;
4573         struct ring_buffer *buffer;
4574         struct print_entry *entry;
4575         unsigned long irq_flags;
4576         struct page *pages[2];
4577         void *map_page[2];
4578         int nr_pages = 1;
4579         ssize_t written;
4580         int offset;
4581         int size;
4582         int len;
4583         int ret;
4584         int i;
4585
4586         if (tracing_disabled)
4587                 return -EINVAL;
4588
4589         if (!(trace_flags & TRACE_ITER_MARKERS))
4590                 return -EINVAL;
4591
4592         if (cnt > TRACE_BUF_SIZE)
4593                 cnt = TRACE_BUF_SIZE;
4594
4595         /*
4596          * Userspace is injecting traces into the kernel trace buffer.
4597          * We want to be as non intrusive as possible.
4598          * To do so, we do not want to allocate any special buffers
4599          * or take any locks, but instead write the userspace data
4600          * straight into the ring buffer.
4601          *
4602          * First we need to pin the userspace buffer into memory,
4603          * which, most likely it is, because it just referenced it.
4604          * But there's no guarantee that it is. By using get_user_pages_fast()
4605          * and kmap_atomic/kunmap_atomic() we can get access to the
4606          * pages directly. We then write the data directly into the
4607          * ring buffer.
4608          */
4609         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
4610
4611         /* check if we cross pages */
4612         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
4613                 nr_pages = 2;
4614
4615         offset = addr & (PAGE_SIZE - 1);
4616         addr &= PAGE_MASK;
4617
4618         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
4619         if (ret < nr_pages) {
4620                 while (--ret >= 0)
4621                         put_page(pages[ret]);
4622                 written = -EFAULT;
4623                 goto out;
4624         }
4625
4626         for (i = 0; i < nr_pages; i++)
4627                 map_page[i] = kmap_atomic(pages[i]);
4628
4629         local_save_flags(irq_flags);
4630         size = sizeof(*entry) + cnt + 2; /* possible \n added */
4631         buffer = tr->trace_buffer.buffer;
4632         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
4633                                           irq_flags, preempt_count());
4634         if (!event) {
4635                 /* Ring buffer disabled, return as if not open for write */
4636                 written = -EBADF;
4637                 goto out_unlock;
4638         }
4639
4640         entry = ring_buffer_event_data(event);
4641         entry->ip = _THIS_IP_;
4642
4643         if (nr_pages == 2) {
4644                 len = PAGE_SIZE - offset;
4645                 memcpy(&entry->buf, map_page[0] + offset, len);
4646                 memcpy(&entry->buf[len], map_page[1], cnt - len);
4647         } else
4648                 memcpy(&entry->buf, map_page[0] + offset, cnt);
4649
4650         if (entry->buf[cnt - 1] != '\n') {
4651                 entry->buf[cnt] = '\n';
4652                 entry->buf[cnt + 1] = '\0';
4653         } else
4654                 entry->buf[cnt] = '\0';
4655
4656         __buffer_unlock_commit(buffer, event);
4657
4658         written = cnt;
4659
4660         *fpos += written;
4661
4662  out_unlock:
4663         for (i = 0; i < nr_pages; i++){
4664                 kunmap_atomic(map_page[i]);
4665                 put_page(pages[i]);
4666         }
4667  out:
4668         return written;
4669 }
4670
4671 static int tracing_clock_show(struct seq_file *m, void *v)
4672 {
4673         struct trace_array *tr = m->private;
4674         int i;
4675
4676         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
4677                 seq_printf(m,
4678                         "%s%s%s%s", i ? " " : "",
4679                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
4680                         i == tr->clock_id ? "]" : "");
4681         seq_putc(m, '\n');
4682
4683         return 0;
4684 }
4685
4686 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
4687                                    size_t cnt, loff_t *fpos)
4688 {
4689         struct seq_file *m = filp->private_data;
4690         struct trace_array *tr = m->private;
4691         char buf[64];
4692         const char *clockstr;
4693         int i;
4694
4695         if (cnt >= sizeof(buf))
4696                 return -EINVAL;
4697
4698         if (copy_from_user(&buf, ubuf, cnt))
4699                 return -EFAULT;
4700
4701         buf[cnt] = 0;
4702
4703         clockstr = strstrip(buf);
4704
4705         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
4706                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
4707                         break;
4708         }
4709         if (i == ARRAY_SIZE(trace_clocks))
4710                 return -EINVAL;
4711
4712         mutex_lock(&trace_types_lock);
4713
4714         tr->clock_id = i;
4715
4716         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
4717
4718         /*
4719          * New clock may not be consistent with the previous clock.
4720          * Reset the buffer so that it doesn't have incomparable timestamps.
4721          */
4722         tracing_reset_online_cpus(&tr->trace_buffer);
4723
4724 #ifdef CONFIG_TRACER_MAX_TRACE
4725         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
4726                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
4727         tracing_reset_online_cpus(&tr->max_buffer);
4728 #endif
4729
4730         mutex_unlock(&trace_types_lock);
4731
4732         *fpos += cnt;
4733
4734         return cnt;
4735 }
4736
4737 static int tracing_clock_open(struct inode *inode, struct file *file)
4738 {
4739         struct trace_array *tr = inode->i_private;
4740         int ret;
4741
4742         if (tracing_disabled)
4743                 return -ENODEV;
4744
4745         if (trace_array_get(tr))
4746                 return -ENODEV;
4747
4748         ret = single_open(file, tracing_clock_show, inode->i_private);
4749         if (ret < 0)
4750                 trace_array_put(tr);
4751
4752         return ret;
4753 }
4754
4755 struct ftrace_buffer_info {
4756         struct trace_iterator   iter;
4757         void                    *spare;
4758         unsigned int            read;
4759 };
4760
4761 #ifdef CONFIG_TRACER_SNAPSHOT
4762 static int tracing_snapshot_open(struct inode *inode, struct file *file)
4763 {
4764         struct trace_array *tr = inode->i_private;
4765         struct trace_iterator *iter;
4766         struct seq_file *m;
4767         int ret = 0;
4768
4769         if (trace_array_get(tr) < 0)
4770                 return -ENODEV;
4771
4772         if (file->f_mode & FMODE_READ) {
4773                 iter = __tracing_open(inode, file, true);
4774                 if (IS_ERR(iter))
4775                         ret = PTR_ERR(iter);
4776         } else {
4777                 /* Writes still need the seq_file to hold the private data */
4778                 ret = -ENOMEM;
4779                 m = kzalloc(sizeof(*m), GFP_KERNEL);
4780                 if (!m)
4781                         goto out;
4782                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4783                 if (!iter) {
4784                         kfree(m);
4785                         goto out;
4786                 }
4787                 ret = 0;
4788
4789                 iter->tr = tr;
4790                 iter->trace_buffer = &tr->max_buffer;
4791                 iter->cpu_file = tracing_get_cpu(inode);
4792                 m->private = iter;
4793                 file->private_data = m;
4794         }
4795 out:
4796         if (ret < 0)
4797                 trace_array_put(tr);
4798
4799         return ret;
4800 }
4801
4802 static ssize_t
4803 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
4804                        loff_t *ppos)
4805 {
4806         struct seq_file *m = filp->private_data;
4807         struct trace_iterator *iter = m->private;
4808         struct trace_array *tr = iter->tr;
4809         unsigned long val;
4810         int ret;
4811
4812         ret = tracing_update_buffers();
4813         if (ret < 0)
4814                 return ret;
4815
4816         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4817         if (ret)
4818                 return ret;
4819
4820         mutex_lock(&trace_types_lock);
4821
4822         if (tr->current_trace->use_max_tr) {
4823                 ret = -EBUSY;
4824                 goto out;
4825         }
4826
4827         switch (val) {
4828         case 0:
4829                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4830                         ret = -EINVAL;
4831                         break;
4832                 }
4833                 if (tr->allocated_snapshot)
4834                         free_snapshot(tr);
4835                 break;
4836         case 1:
4837 /* Only allow per-cpu swap if the ring buffer supports it */
4838 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
4839                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4840                         ret = -EINVAL;
4841                         break;
4842                 }
4843 #endif
4844                 if (!tr->allocated_snapshot) {
4845                         ret = alloc_snapshot(tr);
4846                         if (ret < 0)
4847                                 break;
4848                 }
4849                 local_irq_disable();
4850                 /* Now, we're going to swap */
4851                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4852                         update_max_tr(tr, current, smp_processor_id());
4853                 else
4854                         update_max_tr_single(tr, current, iter->cpu_file);
4855                 local_irq_enable();
4856                 break;
4857         default:
4858                 if (tr->allocated_snapshot) {
4859                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4860                                 tracing_reset_online_cpus(&tr->max_buffer);
4861                         else
4862                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
4863                 }
4864                 break;
4865         }
4866
4867         if (ret >= 0) {
4868                 *ppos += cnt;
4869                 ret = cnt;
4870         }
4871 out:
4872         mutex_unlock(&trace_types_lock);
4873         return ret;
4874 }
4875
4876 static int tracing_snapshot_release(struct inode *inode, struct file *file)
4877 {
4878         struct seq_file *m = file->private_data;
4879         int ret;
4880
4881         ret = tracing_release(inode, file);
4882
4883         if (file->f_mode & FMODE_READ)
4884                 return ret;
4885
4886         /* If write only, the seq_file is just a stub */
4887         if (m)
4888                 kfree(m->private);
4889         kfree(m);
4890
4891         return 0;
4892 }
4893
4894 static int tracing_buffers_open(struct inode *inode, struct file *filp);
4895 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
4896                                     size_t count, loff_t *ppos);
4897 static int tracing_buffers_release(struct inode *inode, struct file *file);
4898 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
4899                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
4900
4901 static int snapshot_raw_open(struct inode *inode, struct file *filp)
4902 {
4903         struct ftrace_buffer_info *info;
4904         int ret;
4905
4906         ret = tracing_buffers_open(inode, filp);
4907         if (ret < 0)
4908                 return ret;
4909
4910         info = filp->private_data;
4911
4912         if (info->iter.trace->use_max_tr) {
4913                 tracing_buffers_release(inode, filp);
4914                 return -EBUSY;
4915         }
4916
4917         info->iter.snapshot = true;
4918         info->iter.trace_buffer = &info->iter.tr->max_buffer;
4919
4920         return ret;
4921 }
4922
4923 #endif /* CONFIG_TRACER_SNAPSHOT */
4924
4925
4926 static const struct file_operations tracing_max_lat_fops = {
4927         .open           = tracing_open_generic,
4928         .read           = tracing_max_lat_read,
4929         .write          = tracing_max_lat_write,
4930         .llseek         = generic_file_llseek,
4931 };
4932
4933 static const struct file_operations set_tracer_fops = {
4934         .open           = tracing_open_generic,
4935         .read           = tracing_set_trace_read,
4936         .write          = tracing_set_trace_write,
4937         .llseek         = generic_file_llseek,
4938 };
4939
4940 static const struct file_operations tracing_pipe_fops = {
4941         .open           = tracing_open_pipe,
4942         .poll           = tracing_poll_pipe,
4943         .read           = tracing_read_pipe,
4944         .splice_read    = tracing_splice_read_pipe,
4945         .release        = tracing_release_pipe,
4946         .llseek         = no_llseek,
4947 };
4948
4949 static const struct file_operations tracing_entries_fops = {
4950         .open           = tracing_open_generic_tr,
4951         .read           = tracing_entries_read,
4952         .write          = tracing_entries_write,
4953         .llseek         = generic_file_llseek,
4954         .release        = tracing_release_generic_tr,
4955 };
4956
4957 static const struct file_operations tracing_total_entries_fops = {
4958         .open           = tracing_open_generic_tr,
4959         .read           = tracing_total_entries_read,
4960         .llseek         = generic_file_llseek,
4961         .release        = tracing_release_generic_tr,
4962 };
4963
4964 static const struct file_operations tracing_free_buffer_fops = {
4965         .open           = tracing_open_generic_tr,
4966         .write          = tracing_free_buffer_write,
4967         .release        = tracing_free_buffer_release,
4968 };
4969
4970 static const struct file_operations tracing_mark_fops = {
4971         .open           = tracing_open_generic_tr,
4972         .write          = tracing_mark_write,
4973         .llseek         = generic_file_llseek,
4974         .release        = tracing_release_generic_tr,
4975 };
4976
4977 static const struct file_operations trace_clock_fops = {
4978         .open           = tracing_clock_open,
4979         .read           = seq_read,
4980         .llseek         = seq_lseek,
4981         .release        = tracing_single_release_tr,
4982         .write          = tracing_clock_write,
4983 };
4984
4985 #ifdef CONFIG_TRACER_SNAPSHOT
4986 static const struct file_operations snapshot_fops = {
4987         .open           = tracing_snapshot_open,
4988         .read           = seq_read,
4989         .write          = tracing_snapshot_write,
4990         .llseek         = tracing_lseek,
4991         .release        = tracing_snapshot_release,
4992 };
4993
4994 static const struct file_operations snapshot_raw_fops = {
4995         .open           = snapshot_raw_open,
4996         .read           = tracing_buffers_read,
4997         .release        = tracing_buffers_release,
4998         .splice_read    = tracing_buffers_splice_read,
4999         .llseek         = no_llseek,
5000 };
5001
5002 #endif /* CONFIG_TRACER_SNAPSHOT */
5003
5004 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5005 {
5006         struct trace_array *tr = inode->i_private;
5007         struct ftrace_buffer_info *info;
5008         int ret;
5009
5010         if (tracing_disabled)
5011                 return -ENODEV;
5012
5013         if (trace_array_get(tr) < 0)
5014                 return -ENODEV;
5015
5016         info = kzalloc(sizeof(*info), GFP_KERNEL);
5017         if (!info) {
5018                 trace_array_put(tr);
5019                 return -ENOMEM;
5020         }
5021
5022         mutex_lock(&trace_types_lock);
5023
5024         info->iter.tr           = tr;
5025         info->iter.cpu_file     = tracing_get_cpu(inode);
5026         info->iter.trace        = tr->current_trace;
5027         info->iter.trace_buffer = &tr->trace_buffer;
5028         info->spare             = NULL;
5029         /* Force reading ring buffer for first read */
5030         info->read              = (unsigned int)-1;
5031
5032         filp->private_data = info;
5033
5034         mutex_unlock(&trace_types_lock);
5035
5036         ret = nonseekable_open(inode, filp);
5037         if (ret < 0)
5038                 trace_array_put(tr);
5039
5040         return ret;
5041 }
5042
5043 static unsigned int
5044 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5045 {
5046         struct ftrace_buffer_info *info = filp->private_data;
5047         struct trace_iterator *iter = &info->iter;
5048
5049         return trace_poll(iter, filp, poll_table);
5050 }
5051
5052 static ssize_t
5053 tracing_buffers_read(struct file *filp, char __user *ubuf,
5054                      size_t count, loff_t *ppos)
5055 {
5056         struct ftrace_buffer_info *info = filp->private_data;
5057         struct trace_iterator *iter = &info->iter;
5058         ssize_t ret;
5059         ssize_t size;
5060
5061         if (!count)
5062                 return 0;
5063
5064         mutex_lock(&trace_types_lock);
5065
5066 #ifdef CONFIG_TRACER_MAX_TRACE
5067         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5068                 size = -EBUSY;
5069                 goto out_unlock;
5070         }
5071 #endif
5072
5073         if (!info->spare)
5074                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5075                                                           iter->cpu_file);
5076         size = -ENOMEM;
5077         if (!info->spare)
5078                 goto out_unlock;
5079
5080         /* Do we have previous read data to read? */
5081         if (info->read < PAGE_SIZE)
5082                 goto read;
5083
5084  again:
5085         trace_access_lock(iter->cpu_file);
5086         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5087                                     &info->spare,
5088                                     count,
5089                                     iter->cpu_file, 0);
5090         trace_access_unlock(iter->cpu_file);
5091
5092         if (ret < 0) {
5093                 if (trace_empty(iter)) {
5094                         if ((filp->f_flags & O_NONBLOCK)) {
5095                                 size = -EAGAIN;
5096                                 goto out_unlock;
5097                         }
5098                         mutex_unlock(&trace_types_lock);
5099                         iter->trace->wait_pipe(iter);
5100                         mutex_lock(&trace_types_lock);
5101                         if (signal_pending(current)) {
5102                                 size = -EINTR;
5103                                 goto out_unlock;
5104                         }
5105                         goto again;
5106                 }
5107                 size = 0;
5108                 goto out_unlock;
5109         }
5110
5111         info->read = 0;
5112  read:
5113         size = PAGE_SIZE - info->read;
5114         if (size > count)
5115                 size = count;
5116
5117         ret = copy_to_user(ubuf, info->spare + info->read, size);
5118         if (ret == size) {
5119                 size = -EFAULT;
5120                 goto out_unlock;
5121         }
5122         size -= ret;
5123
5124         *ppos += size;
5125         info->read += size;
5126
5127  out_unlock:
5128         mutex_unlock(&trace_types_lock);
5129
5130         return size;
5131 }
5132
5133 static int tracing_buffers_release(struct inode *inode, struct file *file)
5134 {
5135         struct ftrace_buffer_info *info = file->private_data;
5136         struct trace_iterator *iter = &info->iter;
5137
5138         mutex_lock(&trace_types_lock);
5139
5140         __trace_array_put(iter->tr);
5141
5142         if (info->spare)
5143                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5144         kfree(info);
5145
5146         mutex_unlock(&trace_types_lock);
5147
5148         return 0;
5149 }
5150
5151 struct buffer_ref {
5152         struct ring_buffer      *buffer;
5153         void                    *page;
5154         int                     ref;
5155 };
5156
5157 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5158                                     struct pipe_buffer *buf)
5159 {
5160         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5161
5162         if (--ref->ref)
5163                 return;
5164
5165         ring_buffer_free_read_page(ref->buffer, ref->page);
5166         kfree(ref);
5167         buf->private = 0;
5168 }
5169
5170 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5171                                 struct pipe_buffer *buf)
5172 {
5173         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5174
5175         ref->ref++;
5176 }
5177
5178 /* Pipe buffer operations for a buffer. */
5179 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5180         .can_merge              = 0,
5181         .map                    = generic_pipe_buf_map,
5182         .unmap                  = generic_pipe_buf_unmap,
5183         .confirm                = generic_pipe_buf_confirm,
5184         .release                = buffer_pipe_buf_release,
5185         .steal                  = generic_pipe_buf_steal,
5186         .get                    = buffer_pipe_buf_get,
5187 };
5188
5189 /*
5190  * Callback from splice_to_pipe(), if we need to release some pages
5191  * at the end of the spd in case we error'ed out in filling the pipe.
5192  */
5193 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5194 {
5195         struct buffer_ref *ref =
5196                 (struct buffer_ref *)spd->partial[i].private;
5197
5198         if (--ref->ref)
5199                 return;
5200
5201         ring_buffer_free_read_page(ref->buffer, ref->page);
5202         kfree(ref);
5203         spd->partial[i].private = 0;
5204 }
5205
5206 static ssize_t
5207 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5208                             struct pipe_inode_info *pipe, size_t len,
5209                             unsigned int flags)
5210 {
5211         struct ftrace_buffer_info *info = file->private_data;
5212         struct trace_iterator *iter = &info->iter;
5213         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5214         struct page *pages_def[PIPE_DEF_BUFFERS];
5215         struct splice_pipe_desc spd = {
5216                 .pages          = pages_def,
5217                 .partial        = partial_def,
5218                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5219                 .flags          = flags,
5220                 .ops            = &buffer_pipe_buf_ops,
5221                 .spd_release    = buffer_spd_release,
5222         };
5223         struct buffer_ref *ref;
5224         int entries, size, i;
5225         ssize_t ret;
5226
5227         mutex_lock(&trace_types_lock);
5228
5229 #ifdef CONFIG_TRACER_MAX_TRACE
5230         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5231                 ret = -EBUSY;
5232                 goto out;
5233         }
5234 #endif
5235
5236         if (splice_grow_spd(pipe, &spd)) {
5237                 ret = -ENOMEM;
5238                 goto out;
5239         }
5240
5241         if (*ppos & (PAGE_SIZE - 1)) {
5242                 ret = -EINVAL;
5243                 goto out;
5244         }
5245
5246         if (len & (PAGE_SIZE - 1)) {
5247                 if (len < PAGE_SIZE) {
5248                         ret = -EINVAL;
5249                         goto out;
5250                 }
5251                 len &= PAGE_MASK;
5252         }
5253
5254  again:
5255         trace_access_lock(iter->cpu_file);
5256         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5257
5258         for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) {
5259                 struct page *page;
5260                 int r;
5261
5262                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5263                 if (!ref)
5264                         break;
5265
5266                 ref->ref = 1;
5267                 ref->buffer = iter->trace_buffer->buffer;
5268                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5269                 if (!ref->page) {
5270                         kfree(ref);
5271                         break;
5272                 }
5273
5274                 r = ring_buffer_read_page(ref->buffer, &ref->page,
5275                                           len, iter->cpu_file, 1);
5276                 if (r < 0) {
5277                         ring_buffer_free_read_page(ref->buffer, ref->page);
5278                         kfree(ref);
5279                         break;
5280                 }
5281
5282                 /*
5283                  * zero out any left over data, this is going to
5284                  * user land.
5285                  */
5286                 size = ring_buffer_page_len(ref->page);
5287                 if (size < PAGE_SIZE)
5288                         memset(ref->page + size, 0, PAGE_SIZE - size);
5289
5290                 page = virt_to_page(ref->page);
5291
5292                 spd.pages[i] = page;
5293                 spd.partial[i].len = PAGE_SIZE;
5294                 spd.partial[i].offset = 0;
5295                 spd.partial[i].private = (unsigned long)ref;
5296                 spd.nr_pages++;
5297                 *ppos += PAGE_SIZE;
5298
5299                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5300         }
5301
5302         trace_access_unlock(iter->cpu_file);
5303         spd.nr_pages = i;
5304
5305         /* did we read anything? */
5306         if (!spd.nr_pages) {
5307                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) {
5308                         ret = -EAGAIN;
5309                         goto out;
5310                 }
5311                 mutex_unlock(&trace_types_lock);
5312                 iter->trace->wait_pipe(iter);
5313                 mutex_lock(&trace_types_lock);
5314                 if (signal_pending(current)) {
5315                         ret = -EINTR;
5316                         goto out;
5317                 }
5318                 goto again;
5319         }
5320
5321         ret = splice_to_pipe(pipe, &spd);
5322         splice_shrink_spd(&spd);
5323 out:
5324         mutex_unlock(&trace_types_lock);
5325
5326         return ret;
5327 }
5328
5329 static const struct file_operations tracing_buffers_fops = {
5330         .open           = tracing_buffers_open,
5331         .read           = tracing_buffers_read,
5332         .poll           = tracing_buffers_poll,
5333         .release        = tracing_buffers_release,
5334         .splice_read    = tracing_buffers_splice_read,
5335         .llseek         = no_llseek,
5336 };
5337
5338 static ssize_t
5339 tracing_stats_read(struct file *filp, char __user *ubuf,
5340                    size_t count, loff_t *ppos)
5341 {
5342         struct inode *inode = file_inode(filp);
5343         struct trace_array *tr = inode->i_private;
5344         struct trace_buffer *trace_buf = &tr->trace_buffer;
5345         int cpu = tracing_get_cpu(inode);
5346         struct trace_seq *s;
5347         unsigned long cnt;
5348         unsigned long long t;
5349         unsigned long usec_rem;
5350
5351         s = kmalloc(sizeof(*s), GFP_KERNEL);
5352         if (!s)
5353                 return -ENOMEM;
5354
5355         trace_seq_init(s);
5356
5357         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5358         trace_seq_printf(s, "entries: %ld\n", cnt);
5359
5360         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5361         trace_seq_printf(s, "overrun: %ld\n", cnt);
5362
5363         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5364         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5365
5366         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5367         trace_seq_printf(s, "bytes: %ld\n", cnt);
5368
5369         if (trace_clocks[tr->clock_id].in_ns) {
5370                 /* local or global for trace_clock */
5371                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5372                 usec_rem = do_div(t, USEC_PER_SEC);
5373                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5374                                                                 t, usec_rem);
5375
5376                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5377                 usec_rem = do_div(t, USEC_PER_SEC);
5378                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5379         } else {
5380                 /* counter or tsc mode for trace_clock */
5381                 trace_seq_printf(s, "oldest event ts: %llu\n",
5382                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5383
5384                 trace_seq_printf(s, "now ts: %llu\n",
5385                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
5386         }
5387
5388         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5389         trace_seq_printf(s, "dropped events: %ld\n", cnt);
5390
5391         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5392         trace_seq_printf(s, "read events: %ld\n", cnt);
5393
5394         count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
5395
5396         kfree(s);
5397
5398         return count;
5399 }
5400
5401 static const struct file_operations tracing_stats_fops = {
5402         .open           = tracing_open_generic_tr,
5403         .read           = tracing_stats_read,
5404         .llseek         = generic_file_llseek,
5405         .release        = tracing_release_generic_tr,
5406 };
5407
5408 #ifdef CONFIG_DYNAMIC_FTRACE
5409
5410 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
5411 {
5412         return 0;
5413 }
5414
5415 static ssize_t
5416 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5417                   size_t cnt, loff_t *ppos)
5418 {
5419         static char ftrace_dyn_info_buffer[1024];
5420         static DEFINE_MUTEX(dyn_info_mutex);
5421         unsigned long *p = filp->private_data;
5422         char *buf = ftrace_dyn_info_buffer;
5423         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
5424         int r;
5425
5426         mutex_lock(&dyn_info_mutex);
5427         r = sprintf(buf, "%ld ", *p);
5428
5429         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
5430         buf[r++] = '\n';
5431
5432         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5433
5434         mutex_unlock(&dyn_info_mutex);
5435
5436         return r;
5437 }
5438
5439 static const struct file_operations tracing_dyn_info_fops = {
5440         .open           = tracing_open_generic,
5441         .read           = tracing_read_dyn_info,
5442         .llseek         = generic_file_llseek,
5443 };
5444 #endif /* CONFIG_DYNAMIC_FTRACE */
5445
5446 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
5447 static void
5448 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5449 {
5450         tracing_snapshot();
5451 }
5452
5453 static void
5454 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5455 {
5456         unsigned long *count = (long *)data;
5457
5458         if (!*count)
5459                 return;
5460
5461         if (*count != -1)
5462                 (*count)--;
5463
5464         tracing_snapshot();
5465 }
5466
5467 static int
5468 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
5469                       struct ftrace_probe_ops *ops, void *data)
5470 {
5471         long count = (long)data;
5472
5473         seq_printf(m, "%ps:", (void *)ip);
5474
5475         seq_printf(m, "snapshot");
5476
5477         if (count == -1)
5478                 seq_printf(m, ":unlimited\n");
5479         else
5480                 seq_printf(m, ":count=%ld\n", count);
5481
5482         return 0;
5483 }
5484
5485 static struct ftrace_probe_ops snapshot_probe_ops = {
5486         .func                   = ftrace_snapshot,
5487         .print                  = ftrace_snapshot_print,
5488 };
5489
5490 static struct ftrace_probe_ops snapshot_count_probe_ops = {
5491         .func                   = ftrace_count_snapshot,
5492         .print                  = ftrace_snapshot_print,
5493 };
5494
5495 static int
5496 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
5497                                char *glob, char *cmd, char *param, int enable)
5498 {
5499         struct ftrace_probe_ops *ops;
5500         void *count = (void *)-1;
5501         char *number;
5502         int ret;
5503
5504         /* hash funcs only work with set_ftrace_filter */
5505         if (!enable)
5506                 return -EINVAL;
5507
5508         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
5509
5510         if (glob[0] == '!') {
5511                 unregister_ftrace_function_probe_func(glob+1, ops);
5512                 return 0;
5513         }
5514
5515         if (!param)
5516                 goto out_reg;
5517
5518         number = strsep(&param, ":");
5519
5520         if (!strlen(number))
5521                 goto out_reg;
5522
5523         /*
5524          * We use the callback data field (which is a pointer)
5525          * as our counter.
5526          */
5527         ret = kstrtoul(number, 0, (unsigned long *)&count);
5528         if (ret)
5529                 return ret;
5530
5531  out_reg:
5532         ret = register_ftrace_function_probe(glob, ops, count);
5533
5534         if (ret >= 0)
5535                 alloc_snapshot(&global_trace);
5536
5537         return ret < 0 ? ret : 0;
5538 }
5539
5540 static struct ftrace_func_command ftrace_snapshot_cmd = {
5541         .name                   = "snapshot",
5542         .func                   = ftrace_trace_snapshot_callback,
5543 };
5544
5545 static __init int register_snapshot_cmd(void)
5546 {
5547         return register_ftrace_command(&ftrace_snapshot_cmd);
5548 }
5549 #else
5550 static inline __init int register_snapshot_cmd(void) { return 0; }
5551 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
5552
5553 struct dentry *tracing_init_dentry_tr(struct trace_array *tr)
5554 {
5555         if (tr->dir)
5556                 return tr->dir;
5557
5558         if (!debugfs_initialized())
5559                 return NULL;
5560
5561         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
5562                 tr->dir = debugfs_create_dir("tracing", NULL);
5563
5564         if (!tr->dir)
5565                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
5566
5567         return tr->dir;
5568 }
5569
5570 struct dentry *tracing_init_dentry(void)
5571 {
5572         return tracing_init_dentry_tr(&global_trace);
5573 }
5574
5575 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
5576 {
5577         struct dentry *d_tracer;
5578
5579         if (tr->percpu_dir)
5580                 return tr->percpu_dir;
5581
5582         d_tracer = tracing_init_dentry_tr(tr);
5583         if (!d_tracer)
5584                 return NULL;
5585
5586         tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer);
5587
5588         WARN_ONCE(!tr->percpu_dir,
5589                   "Could not create debugfs directory 'per_cpu/%d'\n", cpu);
5590
5591         return tr->percpu_dir;
5592 }
5593
5594 static struct dentry *
5595 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
5596                       void *data, long cpu, const struct file_operations *fops)
5597 {
5598         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
5599
5600         if (ret) /* See tracing_get_cpu() */
5601                 ret->d_inode->i_cdev = (void *)(cpu + 1);
5602         return ret;
5603 }
5604
5605 static void
5606 tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
5607 {
5608         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
5609         struct dentry *d_cpu;
5610         char cpu_dir[30]; /* 30 characters should be more than enough */
5611
5612         if (!d_percpu)
5613                 return;
5614
5615         snprintf(cpu_dir, 30, "cpu%ld", cpu);
5616         d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
5617         if (!d_cpu) {
5618                 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
5619                 return;
5620         }
5621
5622         /* per cpu trace_pipe */
5623         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
5624                                 tr, cpu, &tracing_pipe_fops);
5625
5626         /* per cpu trace */
5627         trace_create_cpu_file("trace", 0644, d_cpu,
5628                                 tr, cpu, &tracing_fops);
5629
5630         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
5631                                 tr, cpu, &tracing_buffers_fops);
5632
5633         trace_create_cpu_file("stats", 0444, d_cpu,
5634                                 tr, cpu, &tracing_stats_fops);
5635
5636         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
5637                                 tr, cpu, &tracing_entries_fops);
5638
5639 #ifdef CONFIG_TRACER_SNAPSHOT
5640         trace_create_cpu_file("snapshot", 0644, d_cpu,
5641                                 tr, cpu, &snapshot_fops);
5642
5643         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
5644                                 tr, cpu, &snapshot_raw_fops);
5645 #endif
5646 }
5647
5648 #ifdef CONFIG_FTRACE_SELFTEST
5649 /* Let selftest have access to static functions in this file */
5650 #include "trace_selftest.c"
5651 #endif
5652
5653 struct trace_option_dentry {
5654         struct tracer_opt               *opt;
5655         struct tracer_flags             *flags;
5656         struct trace_array              *tr;
5657         struct dentry                   *entry;
5658 };
5659
5660 static ssize_t
5661 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
5662                         loff_t *ppos)
5663 {
5664         struct trace_option_dentry *topt = filp->private_data;
5665         char *buf;
5666
5667         if (topt->flags->val & topt->opt->bit)
5668                 buf = "1\n";
5669         else
5670                 buf = "0\n";
5671
5672         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5673 }
5674
5675 static ssize_t
5676 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
5677                          loff_t *ppos)
5678 {
5679         struct trace_option_dentry *topt = filp->private_data;
5680         unsigned long val;
5681         int ret;
5682
5683         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5684         if (ret)
5685                 return ret;
5686
5687         if (val != 0 && val != 1)
5688                 return -EINVAL;
5689
5690         if (!!(topt->flags->val & topt->opt->bit) != val) {
5691                 mutex_lock(&trace_types_lock);
5692                 ret = __set_tracer_option(topt->tr->current_trace, topt->flags,
5693                                           topt->opt, !val);
5694                 mutex_unlock(&trace_types_lock);
5695                 if (ret)
5696                         return ret;
5697         }
5698
5699         *ppos += cnt;
5700
5701         return cnt;
5702 }
5703
5704
5705 static const struct file_operations trace_options_fops = {
5706         .open = tracing_open_generic,
5707         .read = trace_options_read,
5708         .write = trace_options_write,
5709         .llseek = generic_file_llseek,
5710 };
5711
5712 static ssize_t
5713 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
5714                         loff_t *ppos)
5715 {
5716         long index = (long)filp->private_data;
5717         char *buf;
5718
5719         if (trace_flags & (1 << index))
5720                 buf = "1\n";
5721         else
5722                 buf = "0\n";
5723
5724         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5725 }
5726
5727 static ssize_t
5728 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
5729                          loff_t *ppos)
5730 {
5731         struct trace_array *tr = &global_trace;
5732         long index = (long)filp->private_data;
5733         unsigned long val;
5734         int ret;
5735
5736         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5737         if (ret)
5738                 return ret;
5739
5740         if (val != 0 && val != 1)
5741                 return -EINVAL;
5742
5743         mutex_lock(&trace_types_lock);
5744         ret = set_tracer_flag(tr, 1 << index, val);
5745         mutex_unlock(&trace_types_lock);
5746
5747         if (ret < 0)
5748                 return ret;
5749
5750         *ppos += cnt;
5751
5752         return cnt;
5753 }
5754
5755 static const struct file_operations trace_options_core_fops = {
5756         .open = tracing_open_generic,
5757         .read = trace_options_core_read,
5758         .write = trace_options_core_write,
5759         .llseek = generic_file_llseek,
5760 };
5761
5762 struct dentry *trace_create_file(const char *name,
5763                                  umode_t mode,
5764                                  struct dentry *parent,
5765                                  void *data,
5766                                  const struct file_operations *fops)
5767 {
5768         struct dentry *ret;
5769
5770         ret = debugfs_create_file(name, mode, parent, data, fops);
5771         if (!ret)
5772                 pr_warning("Could not create debugfs '%s' entry\n", name);
5773
5774         return ret;
5775 }
5776
5777
5778 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
5779 {
5780         struct dentry *d_tracer;
5781
5782         if (tr->options)
5783                 return tr->options;
5784
5785         d_tracer = tracing_init_dentry_tr(tr);
5786         if (!d_tracer)
5787                 return NULL;
5788
5789         tr->options = debugfs_create_dir("options", d_tracer);
5790         if (!tr->options) {
5791                 pr_warning("Could not create debugfs directory 'options'\n");
5792                 return NULL;
5793         }
5794
5795         return tr->options;
5796 }
5797
5798 static void
5799 create_trace_option_file(struct trace_array *tr,
5800                          struct trace_option_dentry *topt,
5801                          struct tracer_flags *flags,
5802                          struct tracer_opt *opt)
5803 {
5804         struct dentry *t_options;
5805
5806         t_options = trace_options_init_dentry(tr);
5807         if (!t_options)
5808                 return;
5809
5810         topt->flags = flags;
5811         topt->opt = opt;
5812         topt->tr = tr;
5813
5814         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
5815                                     &trace_options_fops);
5816
5817 }
5818
5819 static struct trace_option_dentry *
5820 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
5821 {
5822         struct trace_option_dentry *topts;
5823         struct tracer_flags *flags;
5824         struct tracer_opt *opts;
5825         int cnt;
5826
5827         if (!tracer)
5828                 return NULL;
5829
5830         flags = tracer->flags;
5831
5832         if (!flags || !flags->opts)
5833                 return NULL;
5834
5835         opts = flags->opts;
5836
5837         for (cnt = 0; opts[cnt].name; cnt++)
5838                 ;
5839
5840         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
5841         if (!topts)
5842                 return NULL;
5843
5844         for (cnt = 0; opts[cnt].name; cnt++)
5845                 create_trace_option_file(tr, &topts[cnt], flags,
5846                                          &opts[cnt]);
5847
5848         return topts;
5849 }
5850
5851 static void
5852 destroy_trace_option_files(struct trace_option_dentry *topts)
5853 {
5854         int cnt;
5855
5856         if (!topts)
5857                 return;
5858
5859         for (cnt = 0; topts[cnt].opt; cnt++) {
5860                 if (topts[cnt].entry)
5861                         debugfs_remove(topts[cnt].entry);
5862         }
5863
5864         kfree(topts);
5865 }
5866
5867 static struct dentry *
5868 create_trace_option_core_file(struct trace_array *tr,
5869                               const char *option, long index)
5870 {
5871         struct dentry *t_options;
5872
5873         t_options = trace_options_init_dentry(tr);
5874         if (!t_options)
5875                 return NULL;
5876
5877         return trace_create_file(option, 0644, t_options, (void *)index,
5878                                     &trace_options_core_fops);
5879 }
5880
5881 static __init void create_trace_options_dir(struct trace_array *tr)
5882 {
5883         struct dentry *t_options;
5884         int i;
5885
5886         t_options = trace_options_init_dentry(tr);
5887         if (!t_options)
5888                 return;
5889
5890         for (i = 0; trace_options[i]; i++)
5891                 create_trace_option_core_file(tr, trace_options[i], i);
5892 }
5893
5894 static ssize_t
5895 rb_simple_read(struct file *filp, char __user *ubuf,
5896                size_t cnt, loff_t *ppos)
5897 {
5898         struct trace_array *tr = filp->private_data;
5899         char buf[64];
5900         int r;
5901
5902         r = tracer_tracing_is_on(tr);
5903         r = sprintf(buf, "%d\n", r);
5904
5905         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5906 }
5907
5908 static ssize_t
5909 rb_simple_write(struct file *filp, const char __user *ubuf,
5910                 size_t cnt, loff_t *ppos)
5911 {
5912         struct trace_array *tr = filp->private_data;
5913         struct ring_buffer *buffer = tr->trace_buffer.buffer;
5914         unsigned long val;
5915         int ret;
5916
5917         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5918         if (ret)
5919                 return ret;
5920
5921         if (buffer) {
5922                 mutex_lock(&trace_types_lock);
5923                 if (val) {
5924                         tracer_tracing_on(tr);
5925                         if (tr->current_trace->start)
5926                                 tr->current_trace->start(tr);
5927                 } else {
5928                         tracer_tracing_off(tr);
5929                         if (tr->current_trace->stop)
5930                                 tr->current_trace->stop(tr);
5931                 }
5932                 mutex_unlock(&trace_types_lock);
5933         }
5934
5935         (*ppos)++;
5936
5937         return cnt;
5938 }
5939
5940 static const struct file_operations rb_simple_fops = {
5941         .open           = tracing_open_generic_tr,
5942         .read           = rb_simple_read,
5943         .write          = rb_simple_write,
5944         .release        = tracing_release_generic_tr,
5945         .llseek         = default_llseek,
5946 };
5947
5948 struct dentry *trace_instance_dir;
5949
5950 static void
5951 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer);
5952
5953 static int
5954 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
5955 {
5956         enum ring_buffer_flags rb_flags;
5957
5958         rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
5959
5960         buf->tr = tr;
5961
5962         buf->buffer = ring_buffer_alloc(size, rb_flags);
5963         if (!buf->buffer)
5964                 return -ENOMEM;
5965
5966         buf->data = alloc_percpu(struct trace_array_cpu);
5967         if (!buf->data) {
5968                 ring_buffer_free(buf->buffer);
5969                 return -ENOMEM;
5970         }
5971
5972         /* Allocate the first page for all buffers */
5973         set_buffer_entries(&tr->trace_buffer,
5974                            ring_buffer_size(tr->trace_buffer.buffer, 0));
5975
5976         return 0;
5977 }
5978
5979 static int allocate_trace_buffers(struct trace_array *tr, int size)
5980 {
5981         int ret;
5982
5983         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
5984         if (ret)
5985                 return ret;
5986
5987 #ifdef CONFIG_TRACER_MAX_TRACE
5988         ret = allocate_trace_buffer(tr, &tr->max_buffer,
5989                                     allocate_snapshot ? size : 1);
5990         if (WARN_ON(ret)) {
5991                 ring_buffer_free(tr->trace_buffer.buffer);
5992                 free_percpu(tr->trace_buffer.data);
5993                 return -ENOMEM;
5994         }
5995         tr->allocated_snapshot = allocate_snapshot;
5996
5997         /*
5998          * Only the top level trace array gets its snapshot allocated
5999          * from the kernel command line.
6000          */
6001         allocate_snapshot = false;
6002 #endif
6003         return 0;
6004 }
6005
6006 static int new_instance_create(const char *name)
6007 {
6008         struct trace_array *tr;
6009         int ret;
6010
6011         mutex_lock(&trace_types_lock);
6012
6013         ret = -EEXIST;
6014         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6015                 if (tr->name && strcmp(tr->name, name) == 0)
6016                         goto out_unlock;
6017         }
6018
6019         ret = -ENOMEM;
6020         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6021         if (!tr)
6022                 goto out_unlock;
6023
6024         tr->name = kstrdup(name, GFP_KERNEL);
6025         if (!tr->name)
6026                 goto out_free_tr;
6027
6028         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6029                 goto out_free_tr;
6030
6031         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6032
6033         raw_spin_lock_init(&tr->start_lock);
6034
6035         tr->current_trace = &nop_trace;
6036
6037         INIT_LIST_HEAD(&tr->systems);
6038         INIT_LIST_HEAD(&tr->events);
6039
6040         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6041                 goto out_free_tr;
6042
6043         tr->dir = debugfs_create_dir(name, trace_instance_dir);
6044         if (!tr->dir)
6045                 goto out_free_tr;
6046
6047         ret = event_trace_add_tracer(tr->dir, tr);
6048         if (ret) {
6049                 debugfs_remove_recursive(tr->dir);
6050                 goto out_free_tr;
6051         }
6052
6053         init_tracer_debugfs(tr, tr->dir);
6054
6055         list_add(&tr->list, &ftrace_trace_arrays);
6056
6057         mutex_unlock(&trace_types_lock);
6058
6059         return 0;
6060
6061  out_free_tr:
6062         if (tr->trace_buffer.buffer)
6063                 ring_buffer_free(tr->trace_buffer.buffer);
6064         free_cpumask_var(tr->tracing_cpumask);
6065         kfree(tr->name);
6066         kfree(tr);
6067
6068  out_unlock:
6069         mutex_unlock(&trace_types_lock);
6070
6071         return ret;
6072
6073 }
6074
6075 static int instance_delete(const char *name)
6076 {
6077         struct trace_array *tr;
6078         int found = 0;
6079         int ret;
6080
6081         mutex_lock(&trace_types_lock);
6082
6083         ret = -ENODEV;
6084         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6085                 if (tr->name && strcmp(tr->name, name) == 0) {
6086                         found = 1;
6087                         break;
6088                 }
6089         }
6090         if (!found)
6091                 goto out_unlock;
6092
6093         ret = -EBUSY;
6094         if (tr->ref)
6095                 goto out_unlock;
6096
6097         list_del(&tr->list);
6098
6099         event_trace_del_tracer(tr);
6100         debugfs_remove_recursive(tr->dir);
6101         free_percpu(tr->trace_buffer.data);
6102         ring_buffer_free(tr->trace_buffer.buffer);
6103
6104         kfree(tr->name);
6105         kfree(tr);
6106
6107         ret = 0;
6108
6109  out_unlock:
6110         mutex_unlock(&trace_types_lock);
6111
6112         return ret;
6113 }
6114
6115 static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t mode)
6116 {
6117         struct dentry *parent;
6118         int ret;
6119
6120         /* Paranoid: Make sure the parent is the "instances" directory */
6121         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6122         if (WARN_ON_ONCE(parent != trace_instance_dir))
6123                 return -ENOENT;
6124
6125         /*
6126          * The inode mutex is locked, but debugfs_create_dir() will also
6127          * take the mutex. As the instances directory can not be destroyed
6128          * or changed in any other way, it is safe to unlock it, and
6129          * let the dentry try. If two users try to make the same dir at
6130          * the same time, then the new_instance_create() will determine the
6131          * winner.
6132          */
6133         mutex_unlock(&inode->i_mutex);
6134
6135         ret = new_instance_create(dentry->d_iname);
6136
6137         mutex_lock(&inode->i_mutex);
6138
6139         return ret;
6140 }
6141
6142 static int instance_rmdir(struct inode *inode, struct dentry *dentry)
6143 {
6144         struct dentry *parent;
6145         int ret;
6146
6147         /* Paranoid: Make sure the parent is the "instances" directory */
6148         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6149         if (WARN_ON_ONCE(parent != trace_instance_dir))
6150                 return -ENOENT;
6151
6152         /* The caller did a dget() on dentry */
6153         mutex_unlock(&dentry->d_inode->i_mutex);
6154
6155         /*
6156          * The inode mutex is locked, but debugfs_create_dir() will also
6157          * take the mutex. As the instances directory can not be destroyed
6158          * or changed in any other way, it is safe to unlock it, and
6159          * let the dentry try. If two users try to make the same dir at
6160          * the same time, then the instance_delete() will determine the
6161          * winner.
6162          */
6163         mutex_unlock(&inode->i_mutex);
6164
6165         ret = instance_delete(dentry->d_iname);
6166
6167         mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
6168         mutex_lock(&dentry->d_inode->i_mutex);
6169
6170         return ret;
6171 }
6172
6173 static const struct inode_operations instance_dir_inode_operations = {
6174         .lookup         = simple_lookup,
6175         .mkdir          = instance_mkdir,
6176         .rmdir          = instance_rmdir,
6177 };
6178
6179 static __init void create_trace_instances(struct dentry *d_tracer)
6180 {
6181         trace_instance_dir = debugfs_create_dir("instances", d_tracer);
6182         if (WARN_ON(!trace_instance_dir))
6183                 return;
6184
6185         /* Hijack the dir inode operations, to allow mkdir */
6186         trace_instance_dir->d_inode->i_op = &instance_dir_inode_operations;
6187 }
6188
6189 static void
6190 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
6191 {
6192         int cpu;
6193
6194         trace_create_file("tracing_cpumask", 0644, d_tracer,
6195                           tr, &tracing_cpumask_fops);
6196
6197         trace_create_file("trace_options", 0644, d_tracer,
6198                           tr, &tracing_iter_fops);
6199
6200         trace_create_file("trace", 0644, d_tracer,
6201                           tr, &tracing_fops);
6202
6203         trace_create_file("trace_pipe", 0444, d_tracer,
6204                           tr, &tracing_pipe_fops);
6205
6206         trace_create_file("buffer_size_kb", 0644, d_tracer,
6207                           tr, &tracing_entries_fops);
6208
6209         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6210                           tr, &tracing_total_entries_fops);
6211
6212         trace_create_file("free_buffer", 0200, d_tracer,
6213                           tr, &tracing_free_buffer_fops);
6214
6215         trace_create_file("trace_marker", 0220, d_tracer,
6216                           tr, &tracing_mark_fops);
6217
6218         trace_create_file("trace_clock", 0644, d_tracer, tr,
6219                           &trace_clock_fops);
6220
6221         trace_create_file("tracing_on", 0644, d_tracer,
6222                           tr, &rb_simple_fops);
6223
6224 #ifdef CONFIG_TRACER_SNAPSHOT
6225         trace_create_file("snapshot", 0644, d_tracer,
6226                           tr, &snapshot_fops);
6227 #endif
6228
6229         for_each_tracing_cpu(cpu)
6230                 tracing_init_debugfs_percpu(tr, cpu);
6231
6232 }
6233
6234 static __init int tracer_init_debugfs(void)
6235 {
6236         struct dentry *d_tracer;
6237
6238         trace_access_lock_init();
6239
6240         d_tracer = tracing_init_dentry();
6241         if (!d_tracer)
6242                 return 0;
6243
6244         init_tracer_debugfs(&global_trace, d_tracer);
6245
6246         trace_create_file("available_tracers", 0444, d_tracer,
6247                         &global_trace, &show_traces_fops);
6248
6249         trace_create_file("current_tracer", 0644, d_tracer,
6250                         &global_trace, &set_tracer_fops);
6251
6252 #ifdef CONFIG_TRACER_MAX_TRACE
6253         trace_create_file("tracing_max_latency", 0644, d_tracer,
6254                         &tracing_max_latency, &tracing_max_lat_fops);
6255 #endif
6256
6257         trace_create_file("tracing_thresh", 0644, d_tracer,
6258                         &tracing_thresh, &tracing_max_lat_fops);
6259
6260         trace_create_file("README", 0444, d_tracer,
6261                         NULL, &tracing_readme_fops);
6262
6263         trace_create_file("saved_cmdlines", 0444, d_tracer,
6264                         NULL, &tracing_saved_cmdlines_fops);
6265
6266 #ifdef CONFIG_DYNAMIC_FTRACE
6267         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
6268                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
6269 #endif
6270
6271         create_trace_instances(d_tracer);
6272
6273         create_trace_options_dir(&global_trace);
6274
6275         return 0;
6276 }
6277
6278 static int trace_panic_handler(struct notifier_block *this,
6279                                unsigned long event, void *unused)
6280 {
6281         if (ftrace_dump_on_oops)
6282                 ftrace_dump(ftrace_dump_on_oops);
6283         return NOTIFY_OK;
6284 }
6285
6286 static struct notifier_block trace_panic_notifier = {
6287         .notifier_call  = trace_panic_handler,
6288         .next           = NULL,
6289         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
6290 };
6291
6292 static int trace_die_handler(struct notifier_block *self,
6293                              unsigned long val,
6294                              void *data)
6295 {
6296         switch (val) {
6297         case DIE_OOPS:
6298                 if (ftrace_dump_on_oops)
6299                         ftrace_dump(ftrace_dump_on_oops);
6300                 break;
6301         default:
6302                 break;
6303         }
6304         return NOTIFY_OK;
6305 }
6306
6307 static struct notifier_block trace_die_notifier = {
6308         .notifier_call = trace_die_handler,
6309         .priority = 200
6310 };
6311
6312 /*
6313  * printk is set to max of 1024, we really don't need it that big.
6314  * Nothing should be printing 1000 characters anyway.
6315  */
6316 #define TRACE_MAX_PRINT         1000
6317
6318 /*
6319  * Define here KERN_TRACE so that we have one place to modify
6320  * it if we decide to change what log level the ftrace dump
6321  * should be at.
6322  */
6323 #define KERN_TRACE              KERN_EMERG
6324
6325 void
6326 trace_printk_seq(struct trace_seq *s)
6327 {
6328         /* Probably should print a warning here. */
6329         if (s->len >= TRACE_MAX_PRINT)
6330                 s->len = TRACE_MAX_PRINT;
6331
6332         /* should be zero ended, but we are paranoid. */
6333         s->buffer[s->len] = 0;
6334
6335         printk(KERN_TRACE "%s", s->buffer);
6336
6337         trace_seq_init(s);
6338 }
6339
6340 void trace_init_global_iter(struct trace_iterator *iter)
6341 {
6342         iter->tr = &global_trace;
6343         iter->trace = iter->tr->current_trace;
6344         iter->cpu_file = RING_BUFFER_ALL_CPUS;
6345         iter->trace_buffer = &global_trace.trace_buffer;
6346
6347         if (iter->trace && iter->trace->open)
6348                 iter->trace->open(iter);
6349
6350         /* Annotate start of buffers if we had overruns */
6351         if (ring_buffer_overruns(iter->trace_buffer->buffer))
6352                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
6353
6354         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6355         if (trace_clocks[iter->tr->clock_id].in_ns)
6356                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6357 }
6358
6359 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
6360 {
6361         /* use static because iter can be a bit big for the stack */
6362         static struct trace_iterator iter;
6363         static atomic_t dump_running;
6364         unsigned int old_userobj;
6365         unsigned long flags;
6366         int cnt = 0, cpu;
6367
6368         /* Only allow one dump user at a time. */
6369         if (atomic_inc_return(&dump_running) != 1) {
6370                 atomic_dec(&dump_running);
6371                 return;
6372         }
6373
6374         /*
6375          * Always turn off tracing when we dump.
6376          * We don't need to show trace output of what happens
6377          * between multiple crashes.
6378          *
6379          * If the user does a sysrq-z, then they can re-enable
6380          * tracing with echo 1 > tracing_on.
6381          */
6382         tracing_off();
6383
6384         local_irq_save(flags);
6385
6386         /* Simulate the iterator */
6387         trace_init_global_iter(&iter);
6388
6389         for_each_tracing_cpu(cpu) {
6390                 atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled);
6391         }
6392
6393         old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
6394
6395         /* don't look at user memory in panic mode */
6396         trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
6397
6398         switch (oops_dump_mode) {
6399         case DUMP_ALL:
6400                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6401                 break;
6402         case DUMP_ORIG:
6403                 iter.cpu_file = raw_smp_processor_id();
6404                 break;
6405         case DUMP_NONE:
6406                 goto out_enable;
6407         default:
6408                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
6409                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6410         }
6411
6412         printk(KERN_TRACE "Dumping ftrace buffer:\n");
6413
6414         /* Did function tracer already get disabled? */
6415         if (ftrace_is_dead()) {
6416                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
6417                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
6418         }
6419
6420         /*
6421          * We need to stop all tracing on all CPUS to read the
6422          * the next buffer. This is a bit expensive, but is
6423          * not done often. We fill all what we can read,
6424          * and then release the locks again.
6425          */
6426
6427         while (!trace_empty(&iter)) {
6428
6429                 if (!cnt)
6430                         printk(KERN_TRACE "---------------------------------\n");
6431
6432                 cnt++;
6433
6434                 /* reset all but tr, trace, and overruns */
6435                 memset(&iter.seq, 0,
6436                        sizeof(struct trace_iterator) -
6437                        offsetof(struct trace_iterator, seq));
6438                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
6439                 iter.pos = -1;
6440
6441                 if (trace_find_next_entry_inc(&iter) != NULL) {
6442                         int ret;
6443
6444                         ret = print_trace_line(&iter);
6445                         if (ret != TRACE_TYPE_NO_CONSUME)
6446                                 trace_consume(&iter);
6447                 }
6448                 touch_nmi_watchdog();
6449
6450                 trace_printk_seq(&iter.seq);
6451         }
6452
6453         if (!cnt)
6454                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
6455         else
6456                 printk(KERN_TRACE "---------------------------------\n");
6457
6458  out_enable:
6459         trace_flags |= old_userobj;
6460
6461         for_each_tracing_cpu(cpu) {
6462                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
6463         }
6464         atomic_dec(&dump_running);
6465         local_irq_restore(flags);
6466 }
6467 EXPORT_SYMBOL_GPL(ftrace_dump);
6468
6469 __init static int tracer_alloc_buffers(void)
6470 {
6471         int ring_buf_size;
6472         int ret = -ENOMEM;
6473
6474
6475         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
6476                 goto out;
6477
6478         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
6479                 goto out_free_buffer_mask;
6480
6481         /* Only allocate trace_printk buffers if a trace_printk exists */
6482         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
6483                 /* Must be called before global_trace.buffer is allocated */
6484                 trace_printk_init_buffers();
6485
6486         /* To save memory, keep the ring buffer size to its minimum */
6487         if (ring_buffer_expanded)
6488                 ring_buf_size = trace_buf_size;
6489         else
6490                 ring_buf_size = 1;
6491
6492         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
6493         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
6494
6495         raw_spin_lock_init(&global_trace.start_lock);
6496
6497         /* TODO: make the number of buffers hot pluggable with CPUS */
6498         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
6499                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
6500                 WARN_ON(1);
6501                 goto out_free_cpumask;
6502         }
6503
6504         if (global_trace.buffer_disabled)
6505                 tracing_off();
6506
6507         trace_init_cmdlines();
6508
6509         /*
6510          * register_tracer() might reference current_trace, so it
6511          * needs to be set before we register anything. This is
6512          * just a bootstrap of current_trace anyway.
6513          */
6514         global_trace.current_trace = &nop_trace;
6515
6516         register_tracer(&nop_trace);
6517
6518         /* All seems OK, enable tracing */
6519         tracing_disabled = 0;
6520
6521         atomic_notifier_chain_register(&panic_notifier_list,
6522                                        &trace_panic_notifier);
6523
6524         register_die_notifier(&trace_die_notifier);
6525
6526         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
6527
6528         INIT_LIST_HEAD(&global_trace.systems);
6529         INIT_LIST_HEAD(&global_trace.events);
6530         list_add(&global_trace.list, &ftrace_trace_arrays);
6531
6532         while (trace_boot_options) {
6533                 char *option;
6534
6535                 option = strsep(&trace_boot_options, ",");
6536                 trace_set_options(&global_trace, option);
6537         }
6538
6539         register_snapshot_cmd();
6540
6541         return 0;
6542
6543 out_free_cpumask:
6544         free_percpu(global_trace.trace_buffer.data);
6545 #ifdef CONFIG_TRACER_MAX_TRACE
6546         free_percpu(global_trace.max_buffer.data);
6547 #endif
6548         free_cpumask_var(global_trace.tracing_cpumask);
6549 out_free_buffer_mask:
6550         free_cpumask_var(tracing_buffer_mask);
6551 out:
6552         return ret;
6553 }
6554
6555 __init static int clear_boot_tracer(void)
6556 {
6557         /*
6558          * The default tracer at boot buffer is an init section.
6559          * This function is called in lateinit. If we did not
6560          * find the boot tracer, then clear it out, to prevent
6561          * later registration from accessing the buffer that is
6562          * about to be freed.
6563          */
6564         if (!default_bootup_tracer)
6565                 return 0;
6566
6567         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
6568                default_bootup_tracer);
6569         default_bootup_tracer = NULL;
6570
6571         return 0;
6572 }
6573
6574 early_initcall(tracer_alloc_buffers);
6575 fs_initcall(tracer_init_debugfs);
6576 late_initcall(clear_boot_tracer);