4 * Builtin report command: Analyze the perf.data input file,
5 * look up and read DSOs and symbol information and display
6 * a histogram of results, along various sorting keys.
10 #include "util/util.h"
12 #include "util/color.h"
13 #include <linux/list.h>
14 #include "util/cache.h"
15 #include <linux/rbtree.h>
16 #include "util/symbol.h"
17 #include "util/string.h"
18 #include "util/callchain.h"
19 #include "util/strlist.h"
20 #include "util/values.h"
23 #include "util/debug.h"
24 #include "util/header.h"
26 #include "util/parse-options.h"
27 #include "util/parse-events.h"
29 #include "util/data_map.h"
30 #include "util/thread.h"
31 #include "util/sort.h"
32 #include "util/hist.h"
34 static char const *input_name = "perf.data";
36 static char *dso_list_str, *comm_list_str, *sym_list_str,
38 static struct strlist *dso_list, *comm_list, *sym_list;
42 static int full_paths;
43 static int show_nr_samples;
45 static int show_threads;
46 static struct perf_read_values show_threads_values;
48 static char default_pretty_printing_style[] = "normal";
49 static char *pretty_printing_style = default_pretty_printing_style;
51 static int exclude_other = 1;
53 static char callchain_default_opt[] = "fractal,0.5";
55 static struct perf_header *header;
57 static u64 sample_type;
59 struct symbol_conf symbol_conf;
63 callchain__fprintf_left_margin(FILE *fp, int left_margin)
68 ret = fprintf(fp, " ");
70 for (i = 0; i < left_margin; i++)
71 ret += fprintf(fp, " ");
76 static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask,
82 ret += callchain__fprintf_left_margin(fp, left_margin);
84 for (i = 0; i < depth; i++)
85 if (depth_mask & (1 << i))
86 ret += fprintf(fp, "| ");
88 ret += fprintf(fp, " ");
90 ret += fprintf(fp, "\n");
95 ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, int depth,
96 int depth_mask, int count, u64 total_samples,
97 int hits, int left_margin)
102 ret += callchain__fprintf_left_margin(fp, left_margin);
103 for (i = 0; i < depth; i++) {
104 if (depth_mask & (1 << i))
105 ret += fprintf(fp, "|");
107 ret += fprintf(fp, " ");
108 if (!count && i == depth - 1) {
111 percent = hits * 100.0 / total_samples;
112 ret += percent_color_fprintf(fp, "--%2.2f%%-- ", percent);
114 ret += fprintf(fp, "%s", " ");
117 ret += fprintf(fp, "%s\n", chain->sym->name);
119 ret += fprintf(fp, "%p\n", (void *)(long)chain->ip);
124 static struct symbol *rem_sq_bracket;
125 static struct callchain_list rem_hits;
127 static void init_rem_hits(void)
129 rem_sq_bracket = malloc(sizeof(*rem_sq_bracket) + 6);
130 if (!rem_sq_bracket) {
131 fprintf(stderr, "Not enough memory to display remaining hits\n");
135 strcpy(rem_sq_bracket->name, "[...]");
136 rem_hits.sym = rem_sq_bracket;
140 __callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
141 u64 total_samples, int depth, int depth_mask,
144 struct rb_node *node, *next;
145 struct callchain_node *child;
146 struct callchain_list *chain;
147 int new_depth_mask = depth_mask;
153 if (callchain_param.mode == CHAIN_GRAPH_REL)
154 new_total = self->children_hit;
156 new_total = total_samples;
158 remaining = new_total;
160 node = rb_first(&self->rb_root);
164 child = rb_entry(node, struct callchain_node, rb_node);
165 cumul = cumul_hits(child);
169 * The depth mask manages the output of pipes that show
170 * the depth. We don't want to keep the pipes of the current
171 * level for the last child of this depth.
172 * Except if we have remaining filtered hits. They will
173 * supersede the last child
175 next = rb_next(node);
176 if (!next && (callchain_param.mode != CHAIN_GRAPH_REL || !remaining))
177 new_depth_mask &= ~(1 << (depth - 1));
180 * But we keep the older depth mask for the line seperator
181 * to keep the level link until we reach the last child
183 ret += ipchain__fprintf_graph_line(fp, depth, depth_mask,
186 list_for_each_entry(chain, &child->val, list) {
187 if (chain->ip >= PERF_CONTEXT_MAX)
189 ret += ipchain__fprintf_graph(fp, chain, depth,
195 ret += __callchain__fprintf_graph(fp, child, new_total,
197 new_depth_mask | (1 << depth),
202 if (callchain_param.mode == CHAIN_GRAPH_REL &&
203 remaining && remaining != new_total) {
208 new_depth_mask &= ~(1 << (depth - 1));
210 ret += ipchain__fprintf_graph(fp, &rem_hits, depth,
211 new_depth_mask, 0, new_total,
212 remaining, left_margin);
220 callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
221 u64 total_samples, int left_margin)
223 struct callchain_list *chain;
224 bool printed = false;
228 list_for_each_entry(chain, &self->val, list) {
229 if (chain->ip >= PERF_CONTEXT_MAX)
232 if (!i++ && sort__first_dimension == SORT_SYM)
236 ret += callchain__fprintf_left_margin(fp, left_margin);
237 ret += fprintf(fp, "|\n");
238 ret += callchain__fprintf_left_margin(fp, left_margin);
239 ret += fprintf(fp, "---");
244 ret += callchain__fprintf_left_margin(fp, left_margin);
247 ret += fprintf(fp, " %s\n", chain->sym->name);
249 ret += fprintf(fp, " %p\n", (void *)(long)chain->ip);
252 ret += __callchain__fprintf_graph(fp, self, total_samples, 1, 1, left_margin);
258 callchain__fprintf_flat(FILE *fp, struct callchain_node *self,
261 struct callchain_list *chain;
267 ret += callchain__fprintf_flat(fp, self->parent, total_samples);
270 list_for_each_entry(chain, &self->val, list) {
271 if (chain->ip >= PERF_CONTEXT_MAX)
274 ret += fprintf(fp, " %s\n", chain->sym->name);
276 ret += fprintf(fp, " %p\n",
277 (void *)(long)chain->ip);
284 hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self,
285 u64 total_samples, int left_margin)
287 struct rb_node *rb_node;
288 struct callchain_node *chain;
291 rb_node = rb_first(&self->sorted_chain);
295 chain = rb_entry(rb_node, struct callchain_node, rb_node);
296 percent = chain->hit * 100.0 / total_samples;
297 switch (callchain_param.mode) {
299 ret += percent_color_fprintf(fp, " %6.2f%%\n",
301 ret += callchain__fprintf_flat(fp, chain, total_samples);
303 case CHAIN_GRAPH_ABS: /* Falldown */
304 case CHAIN_GRAPH_REL:
305 ret += callchain__fprintf_graph(fp, chain, total_samples,
311 ret += fprintf(fp, "\n");
312 rb_node = rb_next(rb_node);
319 hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples)
321 struct sort_entry *se;
324 if (exclude_other && !self->parent)
328 ret = percent_color_fprintf(fp,
329 field_sep ? "%.2f" : " %6.2f%%",
330 (self->count * 100.0) / total_samples);
332 ret = fprintf(fp, field_sep ? "%lld" : "%12lld ", self->count);
334 if (show_nr_samples) {
336 fprintf(fp, "%c%lld", *field_sep, self->count);
338 fprintf(fp, "%11lld", self->count);
341 list_for_each_entry(se, &hist_entry__sort_list, list) {
345 fprintf(fp, "%s", field_sep ?: " ");
346 ret += se->print(fp, self, se->width ? *se->width : 0);
349 ret += fprintf(fp, "\n");
354 if (sort__first_dimension == SORT_COMM) {
355 se = list_first_entry(&hist_entry__sort_list, typeof(*se),
357 left_margin = se->width ? *se->width : 0;
358 left_margin -= thread__comm_len(self->thread);
361 hist_entry_callchain__fprintf(fp, self, total_samples,
372 static void dso__calc_col_width(struct dso *self)
374 if (!col_width_list_str && !field_sep &&
375 (!dso_list || strlist__has_entry(dso_list, self->name))) {
376 unsigned int slen = strlen(self->name);
377 if (slen > dsos__col_width)
378 dsos__col_width = slen;
381 self->slen_calculated = 1;
384 static void thread__comm_adjust(struct thread *self)
386 char *comm = self->comm;
388 if (!col_width_list_str && !field_sep &&
389 (!comm_list || strlist__has_entry(comm_list, comm))) {
390 unsigned int slen = strlen(comm);
392 if (slen > comms__col_width) {
393 comms__col_width = slen;
394 threads__col_width = slen + 6;
399 static int thread__set_comm_adjust(struct thread *self, const char *comm)
401 int ret = thread__set_comm(self, comm);
406 thread__comm_adjust(self);
412 static struct symbol *
413 resolve_symbol(struct thread *thread, struct map **mapp, u64 *ipp)
415 struct map *map = mapp ? *mapp : NULL;
424 map = thread__find_map(thread, MAP__FUNCTION, ip);
427 * We have to do this here as we may have a dso
428 * with no symbol hit that has a name longer than
429 * the ones with symbols sampled.
431 if (!sort_dso.elide && !map->dso->slen_calculated)
432 dso__calc_col_width(map->dso);
437 ip = map->map_ip(map, ip);
440 * If this is outside of all known maps,
441 * and is a negative address, try to look it
442 * up in the kernel dso, as it might be a
443 * vsyscall or vdso (which executes in user-mode).
445 * XXX This is nasty, we should have a symbol list in
446 * the "[vdso]" dso, but for now lets use the old
447 * trick of looking in the whole kernel symbol list.
449 if ((long long)ip < 0)
450 return kernel_maps__find_function(ip, mapp, NULL);
452 dump_printf(" ...... dso: %s\n",
453 map ? map->dso->long_name : "<not found>");
454 dump_printf(" ...... map: %Lx -> %Lx\n", *ipp, ip);
457 return map ? map__find_symbol(map, ip, NULL) : NULL;
460 static int call__match(struct symbol *sym)
462 if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0))
468 static struct symbol **resolve_callchain(struct thread *thread,
469 struct ip_callchain *chain,
470 struct symbol **parent)
472 u64 context = PERF_CONTEXT_MAX;
473 struct symbol **syms = NULL;
477 syms = calloc(chain->nr, sizeof(*syms));
479 fprintf(stderr, "Can't allocate memory for symbols\n");
484 for (i = 0; i < chain->nr; i++) {
485 u64 ip = chain->ips[i];
486 struct symbol *sym = NULL;
488 if (ip >= PERF_CONTEXT_MAX) {
494 case PERF_CONTEXT_HV:
496 case PERF_CONTEXT_KERNEL:
497 sym = kernel_maps__find_function(ip, NULL, NULL);
500 sym = resolve_symbol(thread, NULL, &ip);
505 if (sort__has_parent && !*parent && call__match(sym))
517 * collect histogram counts
521 hist_entry__add(struct thread *thread, struct map *map,
522 struct symbol *sym, u64 ip, struct ip_callchain *chain,
523 char level, u64 count)
525 struct symbol **syms = NULL, *parent = NULL;
527 struct hist_entry *he;
529 if ((sort__has_parent || callchain) && chain)
530 syms = resolve_callchain(thread, chain, &parent);
532 he = __hist_entry__add(thread, map, sym, parent,
533 ip, count, level, &hit);
542 callchain_init(&he->callchain);
543 append_chain(&he->callchain, chain, syms);
550 static size_t output__fprintf(FILE *fp, u64 total_samples)
552 struct hist_entry *pos;
553 struct sort_entry *se;
557 char *col_width = col_width_list_str;
558 int raw_printing_style;
560 raw_printing_style = !strcmp(pretty_printing_style, "raw");
564 fprintf(fp, "# Samples: %Ld\n", (u64)total_samples);
567 fprintf(fp, "# Overhead");
568 if (show_nr_samples) {
570 fprintf(fp, "%cSamples", *field_sep);
572 fputs(" Samples ", fp);
574 list_for_each_entry(se, &hist_entry__sort_list, list) {
578 fprintf(fp, "%c%s", *field_sep, se->header);
581 width = strlen(se->header);
583 if (col_width_list_str) {
585 *se->width = atoi(col_width);
586 col_width = strchr(col_width, ',');
591 width = *se->width = max(*se->width, width);
593 fprintf(fp, " %*s", width, se->header);
600 fprintf(fp, "# ........");
602 fprintf(fp, " ..........");
603 list_for_each_entry(se, &hist_entry__sort_list, list) {
613 width = strlen(se->header);
614 for (i = 0; i < width; i++)
622 for (nd = rb_first(&output_hists); nd; nd = rb_next(nd)) {
623 pos = rb_entry(nd, struct hist_entry, rb_node);
624 ret += hist_entry__fprintf(fp, pos, total_samples);
627 if (sort_order == default_sort_order &&
628 parent_pattern == default_parent_pattern) {
630 fprintf(fp, "# (For a higher level overview, try: perf report --sort comm,dso)\n");
635 free(rem_sq_bracket);
638 perf_read_values_display(fp, &show_threads_values,
644 static int validate_chain(struct ip_callchain *chain, event_t *event)
646 unsigned int chain_size;
648 chain_size = event->header.size;
649 chain_size -= (unsigned long)&event->ip.__more_data - (unsigned long)event;
651 if (chain->nr*sizeof(u64) > chain_size)
657 static int process_sample_event(event_t *event)
660 struct symbol *sym = NULL;
661 u64 ip = event->ip.ip;
663 struct map *map = NULL;
664 void *more_data = event->ip.__more_data;
665 struct ip_callchain *chain = NULL;
667 struct thread *thread = threads__findnew(event->ip.pid);
669 if (sample_type & PERF_SAMPLE_PERIOD) {
670 period = *(u64 *)more_data;
671 more_data += sizeof(u64);
674 dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
676 event->ip.pid, event->ip.tid,
680 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
683 chain = (void *)more_data;
685 dump_printf("... chain: nr:%Lu\n", chain->nr);
687 if (validate_chain(chain, event) < 0) {
688 pr_debug("call-chain problem with event, "
694 for (i = 0; i < chain->nr; i++)
695 dump_printf("..... %2d: %016Lx\n", i, chain->ips[i]);
699 if (thread == NULL) {
700 pr_debug("problem processing %d event, skipping it.\n",
705 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
707 if (comm_list && !strlist__has_entry(comm_list, thread->comm))
710 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
712 if (cpumode == PERF_RECORD_MISC_KERNEL) {
714 sym = kernel_maps__find_function(ip, &map, NULL);
715 dump_printf(" ...... dso: %s\n",
716 map ? map->dso->long_name : "<not found>");
717 } else if (cpumode == PERF_RECORD_MISC_USER) {
719 sym = resolve_symbol(thread, &map, &ip);
723 dump_printf(" ...... dso: [hypervisor]\n");
727 (!map || !map->dso ||
728 !(strlist__has_entry(dso_list, map->dso->short_name) ||
729 (map->dso->short_name != map->dso->long_name &&
730 strlist__has_entry(dso_list, map->dso->long_name)))))
733 if (sym_list && sym && !strlist__has_entry(sym_list, sym->name))
736 if (hist_entry__add(thread, map, sym, ip,
737 chain, level, period)) {
738 pr_debug("problem incrementing symbol count, skipping event\n");
742 event__stats.total += period;
747 static int process_comm_event(event_t *event)
749 struct thread *thread = threads__findnew(event->comm.pid);
751 dump_printf(": %s:%d\n", event->comm.comm, event->comm.pid);
753 if (thread == NULL ||
754 thread__set_comm_adjust(thread, event->comm.comm)) {
755 dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
762 static int process_read_event(event_t *event)
764 struct perf_event_attr *attr;
766 attr = perf_header__find_attr(event->read.id, header);
769 const char *name = attr ? __event_name(attr->type, attr->config)
771 perf_read_values_add_value(&show_threads_values,
772 event->read.pid, event->read.tid,
778 dump_printf(": %d %d %s %Lu\n", event->read.pid, event->read.tid,
779 attr ? __event_name(attr->type, attr->config) : "FAIL",
785 static int sample_type_check(u64 type)
789 if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) {
790 if (sort__has_parent) {
791 fprintf(stderr, "selected --sort parent, but no"
792 " callchain data. Did you call"
793 " perf record without -g?\n");
797 fprintf(stderr, "selected -g but no callchain data."
798 " Did you call perf record without"
802 } else if (callchain_param.mode != CHAIN_NONE && !callchain) {
804 if (register_callchain_param(&callchain_param) < 0) {
805 fprintf(stderr, "Can't register callchain"
814 static struct perf_file_handler file_handler = {
815 .process_sample_event = process_sample_event,
816 .process_mmap_event = event__process_mmap,
817 .process_comm_event = process_comm_event,
818 .process_exit_event = event__process_task,
819 .process_fork_event = event__process_task,
820 .process_lost_event = event__process_lost,
821 .process_read_event = process_read_event,
822 .sample_type_check = sample_type_check,
826 static int __cmd_report(void)
831 idle = register_idle_thread();
832 thread__comm_adjust(idle);
835 perf_read_values_init(&show_threads_values);
837 register_perf_file_handler(&file_handler);
839 ret = mmap_dispatch_perf_file(&header, input_name, force,
840 full_paths, &event__cwdlen, &event__cwd);
845 event__print_totals();
850 threads__fprintf(stdout);
853 dsos__fprintf(stdout);
856 output__resort(event__stats.total);
857 output__fprintf(stdout, event__stats.total);
860 perf_read_values_destroy(&show_threads_values);
866 parse_callchain_opt(const struct option *opt __used, const char *arg,
877 tok = strtok((char *)arg, ",");
881 /* get the output mode */
882 if (!strncmp(tok, "graph", strlen(arg)))
883 callchain_param.mode = CHAIN_GRAPH_ABS;
885 else if (!strncmp(tok, "flat", strlen(arg)))
886 callchain_param.mode = CHAIN_FLAT;
888 else if (!strncmp(tok, "fractal", strlen(arg)))
889 callchain_param.mode = CHAIN_GRAPH_REL;
891 else if (!strncmp(tok, "none", strlen(arg))) {
892 callchain_param.mode = CHAIN_NONE;
901 /* get the min percentage */
902 tok = strtok(NULL, ",");
906 callchain_param.min_percent = strtod(tok, &endptr);
911 if (register_callchain_param(&callchain_param) < 0) {
912 fprintf(stderr, "Can't register callchain params\n");
918 //static const char * const report_usage[] = {
919 const char * const report_usage[] = {
920 "perf report [<options>] <command>",
924 static const struct option options[] = {
925 OPT_STRING('i', "input", &input_name, "file",
927 OPT_BOOLEAN('v', "verbose", &verbose,
928 "be more verbose (show symbol address, etc)"),
929 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
930 "dump raw trace in ASCII"),
931 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
932 "file", "vmlinux pathname"),
933 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
934 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
935 "load module symbols - WARNING: use only with -k and LIVE kernel"),
936 OPT_BOOLEAN('n', "show-nr-samples", &show_nr_samples,
937 "Show a column with the number of samples"),
938 OPT_BOOLEAN('T', "threads", &show_threads,
939 "Show per-thread event counters"),
940 OPT_STRING(0, "pretty", &pretty_printing_style, "key",
941 "pretty printing style key: normal raw"),
942 OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
943 "sort by key(s): pid, comm, dso, symbol, parent"),
944 OPT_BOOLEAN('P', "full-paths", &full_paths,
945 "Don't shorten the pathnames taking into account the cwd"),
946 OPT_STRING('p', "parent", &parent_pattern, "regex",
947 "regex filter to identify parent, see: '--sort parent'"),
948 OPT_BOOLEAN('x', "exclude-other", &exclude_other,
949 "Only display entries with parent-match"),
950 OPT_CALLBACK_DEFAULT('g', "call-graph", NULL, "output_type,min_percent",
951 "Display callchains using output_type and min percent threshold. "
952 "Default: fractal,0.5", &parse_callchain_opt, callchain_default_opt),
953 OPT_STRING('d', "dsos", &dso_list_str, "dso[,dso...]",
954 "only consider symbols in these dsos"),
955 OPT_STRING('C', "comms", &comm_list_str, "comm[,comm...]",
956 "only consider symbols in these comms"),
957 OPT_STRING('S', "symbols", &sym_list_str, "symbol[,symbol...]",
958 "only consider these symbols"),
959 OPT_STRING('w', "column-widths", &col_width_list_str,
961 "don't try to adjust column width, use these fixed values"),
962 OPT_STRING('t', "field-separator", &field_sep, "separator",
963 "separator for columns, no spaces will be added between "
964 "columns '.' is reserved."),
968 static void setup_sorting(void)
970 char *tmp, *tok, *str = strdup(sort_order);
972 for (tok = strtok_r(str, ", ", &tmp);
973 tok; tok = strtok_r(NULL, ", ", &tmp)) {
974 if (sort_dimension__add(tok) < 0) {
975 error("Unknown --sort key: `%s'", tok);
976 usage_with_options(report_usage, options);
983 static void setup_list(struct strlist **list, const char *list_str,
984 struct sort_entry *se, const char *list_name,
988 *list = strlist__new(true, list_str);
990 fprintf(stderr, "problems parsing %s list\n",
994 if (strlist__nr_entries(*list) == 1) {
995 fprintf(fp, "# %s: %s\n", list_name,
996 strlist__entry(*list, 0)->s);
1002 int cmd_report(int argc, const char **argv, const char *prefix __used)
1004 if (symbol__init(&symbol_conf) < 0)
1007 argc = parse_options(argc, argv, options, report_usage, 0);
1011 if (parent_pattern != default_parent_pattern) {
1012 sort_dimension__add("parent");
1013 sort_parent.elide = 1;
1018 * Any (unrecognized) arguments left?
1021 usage_with_options(report_usage, options);
1025 setup_list(&dso_list, dso_list_str, &sort_dso, "dso", stdout);
1026 setup_list(&comm_list, comm_list_str, &sort_comm, "comm", stdout);
1027 setup_list(&sym_list, sym_list_str, &sort_sym, "symbol", stdout);
1029 if (field_sep && *field_sep == '.') {
1030 fputs("'.' is the only non valid --field-separator argument\n",
1035 return __cmd_report();