]> Pileus Git - ~andy/linux/blob - tools/perf/builtin-top.c
perf top: Introduce --hide_{user,kernel}_symbols
[~andy/linux] / tools / perf / builtin-top.c
1 /*
2  * builtin-top.c
3  *
4  * Builtin top command: Display a continuously updated profile of
5  * any workload, CPU or specific PID.
6  *
7  * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
8  *
9  * Improvements and fixes by:
10  *
11  *   Arjan van de Ven <arjan@linux.intel.com>
12  *   Yanmin Zhang <yanmin.zhang@intel.com>
13  *   Wu Fengguang <fengguang.wu@intel.com>
14  *   Mike Galbraith <efault@gmx.de>
15  *   Paul Mackerras <paulus@samba.org>
16  *
17  * Released under the GPL v2. (and only v2, not any later version)
18  */
19 #include "builtin.h"
20
21 #include "perf.h"
22
23 #include "util/symbol.h"
24 #include "util/color.h"
25 #include "util/thread.h"
26 #include "util/util.h"
27 #include <linux/rbtree.h>
28 #include "util/parse-options.h"
29 #include "util/parse-events.h"
30
31 #include "util/debug.h"
32
33 #include <assert.h>
34 #include <fcntl.h>
35
36 #include <stdio.h>
37 #include <termios.h>
38 #include <unistd.h>
39
40 #include <errno.h>
41 #include <time.h>
42 #include <sched.h>
43 #include <pthread.h>
44
45 #include <sys/syscall.h>
46 #include <sys/ioctl.h>
47 #include <sys/poll.h>
48 #include <sys/prctl.h>
49 #include <sys/wait.h>
50 #include <sys/uio.h>
51 #include <sys/mman.h>
52
53 #include <linux/unistd.h>
54 #include <linux/types.h>
55
56 static int                      fd[MAX_NR_CPUS][MAX_COUNTERS];
57
58 static int                      system_wide                     =      0;
59
60 static int                      default_interval                =      0;
61
62 static int                      count_filter                    =      5;
63 static int                      print_entries;
64
65 static int                      target_pid                      =     -1;
66 static int                      inherit                         =      0;
67 static int                      profile_cpu                     =     -1;
68 static int                      nr_cpus                         =      0;
69 static unsigned int             realtime_prio                   =      0;
70 static int                      group                           =      0;
71 static unsigned int             page_size;
72 static unsigned int             mmap_pages                      =     16;
73 static int                      freq                            =   1000; /* 1 KHz */
74
75 static int                      delay_secs                      =      2;
76 static int                      zero                            =      0;
77 static int                      dump_symtab                     =      0;
78
79 static bool                     hide_kernel_symbols             =  false;
80 static bool                     hide_user_symbols               =  false;
81
82 /*
83  * Source
84  */
85
86 struct source_line {
87         u64                     eip;
88         unsigned long           count[MAX_COUNTERS];
89         char                    *line;
90         struct source_line      *next;
91 };
92
93 static char                     *sym_filter                     =   NULL;
94 struct sym_entry                *sym_filter_entry               =   NULL;
95 static int                      sym_pcnt_filter                 =      5;
96 static int                      sym_counter                     =      0;
97 static int                      display_weighted                =     -1;
98
99 /*
100  * Symbols
101  */
102
103 struct sym_entry {
104         struct rb_node          rb_node;
105         struct list_head        node;
106         unsigned long           count[MAX_COUNTERS];
107         unsigned long           snap_count;
108         double                  weight;
109         int                     skip;
110         u8                      origin;
111         struct map              *map;
112         struct source_line      *source;
113         struct source_line      *lines;
114         struct source_line      **lines_tail;
115         pthread_mutex_t         source_lock;
116 };
117
118 /*
119  * Source functions
120  */
121
122 /* most GUI terminals set LINES (although some don't export it) */
123 static int term_rows(void)
124 {
125         char *lines_string = getenv("LINES");
126         int n_lines;
127
128         if (lines_string && (n_lines = atoi(lines_string)) > 0)
129                 return n_lines;
130 #ifdef TIOCGWINSZ
131         else {
132                 struct winsize ws;
133                 if (!ioctl(1, TIOCGWINSZ, &ws) && ws.ws_row)
134                         return ws.ws_row;
135         }
136 #endif
137         return 25;
138 }
139
140 static void update_print_entries(void)
141 {
142         print_entries = term_rows();
143         if (print_entries > 9)
144                 print_entries -= 9;
145 }
146
147 static void sig_winch_handler(int sig __used)
148 {
149         update_print_entries();
150 }
151
152 static void parse_source(struct sym_entry *syme)
153 {
154         struct symbol *sym;
155         struct map *map;
156         FILE *file;
157         char command[PATH_MAX*2];
158         const char *path;
159         u64 len;
160
161         if (!syme)
162                 return;
163
164         if (syme->lines) {
165                 pthread_mutex_lock(&syme->source_lock);
166                 goto out_assign;
167         }
168
169         sym = (struct symbol *)(syme + 1);
170         map = syme->map;
171         path = map->dso->long_name;
172
173         len = sym->end - sym->start;
174
175         sprintf(command,
176                 "objdump --start-address=0x%016Lx "
177                          "--stop-address=0x%016Lx -dS %s",
178                 map->unmap_ip(map, sym->start),
179                 map->unmap_ip(map, sym->end), path);
180
181         file = popen(command, "r");
182         if (!file)
183                 return;
184
185         pthread_mutex_lock(&syme->source_lock);
186         syme->lines_tail = &syme->lines;
187         while (!feof(file)) {
188                 struct source_line *src;
189                 size_t dummy = 0;
190                 char *c;
191
192                 src = malloc(sizeof(struct source_line));
193                 assert(src != NULL);
194                 memset(src, 0, sizeof(struct source_line));
195
196                 if (getline(&src->line, &dummy, file) < 0)
197                         break;
198                 if (!src->line)
199                         break;
200
201                 c = strchr(src->line, '\n');
202                 if (c)
203                         *c = 0;
204
205                 src->next = NULL;
206                 *syme->lines_tail = src;
207                 syme->lines_tail = &src->next;
208
209                 if (strlen(src->line)>8 && src->line[8] == ':') {
210                         src->eip = strtoull(src->line, NULL, 16);
211                         src->eip = map->unmap_ip(map, src->eip);
212                 }
213                 if (strlen(src->line)>8 && src->line[16] == ':') {
214                         src->eip = strtoull(src->line, NULL, 16);
215                         src->eip = map->unmap_ip(map, src->eip);
216                 }
217         }
218         pclose(file);
219 out_assign:
220         sym_filter_entry = syme;
221         pthread_mutex_unlock(&syme->source_lock);
222 }
223
224 static void __zero_source_counters(struct sym_entry *syme)
225 {
226         int i;
227         struct source_line *line;
228
229         line = syme->lines;
230         while (line) {
231                 for (i = 0; i < nr_counters; i++)
232                         line->count[i] = 0;
233                 line = line->next;
234         }
235 }
236
237 static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip)
238 {
239         struct source_line *line;
240
241         if (syme != sym_filter_entry)
242                 return;
243
244         if (pthread_mutex_trylock(&syme->source_lock))
245                 return;
246
247         if (!syme->source)
248                 goto out_unlock;
249
250         for (line = syme->lines; line; line = line->next) {
251                 if (line->eip == ip) {
252                         line->count[counter]++;
253                         break;
254                 }
255                 if (line->eip > ip)
256                         break;
257         }
258 out_unlock:
259         pthread_mutex_unlock(&syme->source_lock);
260 }
261
262 static void lookup_sym_source(struct sym_entry *syme)
263 {
264         struct symbol *symbol = (struct symbol *)(syme + 1);
265         struct source_line *line;
266         char pattern[PATH_MAX];
267
268         sprintf(pattern, "<%s>:", symbol->name);
269
270         pthread_mutex_lock(&syme->source_lock);
271         for (line = syme->lines; line; line = line->next) {
272                 if (strstr(line->line, pattern)) {
273                         syme->source = line;
274                         break;
275                 }
276         }
277         pthread_mutex_unlock(&syme->source_lock);
278 }
279
280 static void show_lines(struct source_line *queue, int count, int total)
281 {
282         int i;
283         struct source_line *line;
284
285         line = queue;
286         for (i = 0; i < count; i++) {
287                 float pcnt = 100.0*(float)line->count[sym_counter]/(float)total;
288
289                 printf("%8li %4.1f%%\t%s\n", line->count[sym_counter], pcnt, line->line);
290                 line = line->next;
291         }
292 }
293
294 #define TRACE_COUNT     3
295
296 static void show_details(struct sym_entry *syme)
297 {
298         struct symbol *symbol;
299         struct source_line *line;
300         struct source_line *line_queue = NULL;
301         int displayed = 0;
302         int line_queue_count = 0, total = 0, more = 0;
303
304         if (!syme)
305                 return;
306
307         if (!syme->source)
308                 lookup_sym_source(syme);
309
310         if (!syme->source)
311                 return;
312
313         symbol = (struct symbol *)(syme + 1);
314         printf("Showing %s for %s\n", event_name(sym_counter), symbol->name);
315         printf("  Events  Pcnt (>=%d%%)\n", sym_pcnt_filter);
316
317         pthread_mutex_lock(&syme->source_lock);
318         line = syme->source;
319         while (line) {
320                 total += line->count[sym_counter];
321                 line = line->next;
322         }
323
324         line = syme->source;
325         while (line) {
326                 float pcnt = 0.0;
327
328                 if (!line_queue_count)
329                         line_queue = line;
330                 line_queue_count++;
331
332                 if (line->count[sym_counter])
333                         pcnt = 100.0 * line->count[sym_counter] / (float)total;
334                 if (pcnt >= (float)sym_pcnt_filter) {
335                         if (displayed <= print_entries)
336                                 show_lines(line_queue, line_queue_count, total);
337                         else more++;
338                         displayed += line_queue_count;
339                         line_queue_count = 0;
340                         line_queue = NULL;
341                 } else if (line_queue_count > TRACE_COUNT) {
342                         line_queue = line_queue->next;
343                         line_queue_count--;
344                 }
345
346                 line->count[sym_counter] = zero ? 0 : line->count[sym_counter] * 7 / 8;
347                 line = line->next;
348         }
349         pthread_mutex_unlock(&syme->source_lock);
350         if (more)
351                 printf("%d lines not displayed, maybe increase display entries [e]\n", more);
352 }
353
354 /*
355  * Symbols will be added here in event__process_sample and will get out
356  * after decayed.
357  */
358 static LIST_HEAD(active_symbols);
359 static pthread_mutex_t active_symbols_lock = PTHREAD_MUTEX_INITIALIZER;
360
361 /*
362  * Ordering weight: count-1 * count-2 * ... / count-n
363  */
364 static double sym_weight(const struct sym_entry *sym)
365 {
366         double weight = sym->snap_count;
367         int counter;
368
369         if (!display_weighted)
370                 return weight;
371
372         for (counter = 1; counter < nr_counters-1; counter++)
373                 weight *= sym->count[counter];
374
375         weight /= (sym->count[counter] + 1);
376
377         return weight;
378 }
379
380 static long                     samples;
381 static long                     userspace_samples;
382 static const char               CONSOLE_CLEAR[] = "\e[H\e[2J";
383
384 static void __list_insert_active_sym(struct sym_entry *syme)
385 {
386         list_add(&syme->node, &active_symbols);
387 }
388
389 static void list_remove_active_sym(struct sym_entry *syme)
390 {
391         pthread_mutex_lock(&active_symbols_lock);
392         list_del_init(&syme->node);
393         pthread_mutex_unlock(&active_symbols_lock);
394 }
395
396 static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se)
397 {
398         struct rb_node **p = &tree->rb_node;
399         struct rb_node *parent = NULL;
400         struct sym_entry *iter;
401
402         while (*p != NULL) {
403                 parent = *p;
404                 iter = rb_entry(parent, struct sym_entry, rb_node);
405
406                 if (se->weight > iter->weight)
407                         p = &(*p)->rb_left;
408                 else
409                         p = &(*p)->rb_right;
410         }
411
412         rb_link_node(&se->rb_node, parent, p);
413         rb_insert_color(&se->rb_node, tree);
414 }
415
416 static void print_sym_table(void)
417 {
418         int printed = 0, j;
419         int counter, snap = !display_weighted ? sym_counter : 0;
420         float samples_per_sec = samples/delay_secs;
421         float ksamples_per_sec = (samples-userspace_samples)/delay_secs;
422         float sum_ksamples = 0.0;
423         struct sym_entry *syme, *n;
424         struct rb_root tmp = RB_ROOT;
425         struct rb_node *nd;
426
427         samples = userspace_samples = 0;
428
429         /* Sort the active symbols */
430         pthread_mutex_lock(&active_symbols_lock);
431         syme = list_entry(active_symbols.next, struct sym_entry, node);
432         pthread_mutex_unlock(&active_symbols_lock);
433
434         list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
435                 syme->snap_count = syme->count[snap];
436                 if (syme->snap_count != 0) {
437                         if ((hide_user_symbols &&
438                              syme->origin == PERF_RECORD_MISC_USER) ||
439                             (hide_kernel_symbols &&
440                              syme->origin == PERF_RECORD_MISC_KERNEL)) {
441                                 list_remove_active_sym(syme);
442                                 continue;
443                         }
444                         syme->weight = sym_weight(syme);
445                         rb_insert_active_sym(&tmp, syme);
446                         sum_ksamples += syme->snap_count;
447
448                         for (j = 0; j < nr_counters; j++)
449                                 syme->count[j] = zero ? 0 : syme->count[j] * 7 / 8;
450                 } else
451                         list_remove_active_sym(syme);
452         }
453
454         puts(CONSOLE_CLEAR);
455
456         printf(
457 "------------------------------------------------------------------------------\n");
458         printf( "   PerfTop:%8.0f irqs/sec  kernel:%4.1f%% [",
459                 samples_per_sec,
460                 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec)));
461
462         if (nr_counters == 1 || !display_weighted) {
463                 printf("%Ld", (u64)attrs[0].sample_period);
464                 if (freq)
465                         printf("Hz ");
466                 else
467                         printf(" ");
468         }
469
470         if (!display_weighted)
471                 printf("%s", event_name(sym_counter));
472         else for (counter = 0; counter < nr_counters; counter++) {
473                 if (counter)
474                         printf("/");
475
476                 printf("%s", event_name(counter));
477         }
478
479         printf( "], ");
480
481         if (target_pid != -1)
482                 printf(" (target_pid: %d", target_pid);
483         else
484                 printf(" (all");
485
486         if (profile_cpu != -1)
487                 printf(", cpu: %d)\n", profile_cpu);
488         else {
489                 if (target_pid != -1)
490                         printf(")\n");
491                 else
492                         printf(", %d CPUs)\n", nr_cpus);
493         }
494
495         printf("------------------------------------------------------------------------------\n\n");
496
497         if (sym_filter_entry) {
498                 show_details(sym_filter_entry);
499                 return;
500         }
501
502         if (nr_counters == 1)
503                 printf("             samples  pcnt");
504         else
505                 printf("   weight    samples  pcnt");
506
507         if (verbose)
508                 printf("         RIP       ");
509         printf(" function                                 DSO\n");
510         printf("   %s    _______ _____",
511                nr_counters == 1 ? "      " : "______");
512         if (verbose)
513                 printf(" ________________");
514         printf(" ________________________________ ________________\n\n");
515
516         for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) {
517                 struct symbol *sym;
518                 double pcnt;
519
520                 syme = rb_entry(nd, struct sym_entry, rb_node);
521                 sym = (struct symbol *)(syme + 1);
522
523                 if (++printed > print_entries || (int)syme->snap_count < count_filter)
524                         continue;
525
526                 pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) /
527                                          sum_ksamples));
528
529                 if (nr_counters == 1 || !display_weighted)
530                         printf("%20.2f ", syme->weight);
531                 else
532                         printf("%9.1f %10ld ", syme->weight, syme->snap_count);
533
534                 percent_color_fprintf(stdout, "%4.1f%%", pcnt);
535                 if (verbose)
536                         printf(" %016llx", sym->start);
537                 printf(" %-32s", sym->name);
538                 printf(" %s", syme->map->dso->short_name);
539                 printf("\n");
540         }
541 }
542
543 static void prompt_integer(int *target, const char *msg)
544 {
545         char *buf = malloc(0), *p;
546         size_t dummy = 0;
547         int tmp;
548
549         fprintf(stdout, "\n%s: ", msg);
550         if (getline(&buf, &dummy, stdin) < 0)
551                 return;
552
553         p = strchr(buf, '\n');
554         if (p)
555                 *p = 0;
556
557         p = buf;
558         while(*p) {
559                 if (!isdigit(*p))
560                         goto out_free;
561                 p++;
562         }
563         tmp = strtoul(buf, NULL, 10);
564         *target = tmp;
565 out_free:
566         free(buf);
567 }
568
569 static void prompt_percent(int *target, const char *msg)
570 {
571         int tmp = 0;
572
573         prompt_integer(&tmp, msg);
574         if (tmp >= 0 && tmp <= 100)
575                 *target = tmp;
576 }
577
578 static void prompt_symbol(struct sym_entry **target, const char *msg)
579 {
580         char *buf = malloc(0), *p;
581         struct sym_entry *syme = *target, *n, *found = NULL;
582         size_t dummy = 0;
583
584         /* zero counters of active symbol */
585         if (syme) {
586                 pthread_mutex_lock(&syme->source_lock);
587                 __zero_source_counters(syme);
588                 *target = NULL;
589                 pthread_mutex_unlock(&syme->source_lock);
590         }
591
592         fprintf(stdout, "\n%s: ", msg);
593         if (getline(&buf, &dummy, stdin) < 0)
594                 goto out_free;
595
596         p = strchr(buf, '\n');
597         if (p)
598                 *p = 0;
599
600         pthread_mutex_lock(&active_symbols_lock);
601         syme = list_entry(active_symbols.next, struct sym_entry, node);
602         pthread_mutex_unlock(&active_symbols_lock);
603
604         list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
605                 struct symbol *sym = (struct symbol *)(syme + 1);
606
607                 if (!strcmp(buf, sym->name)) {
608                         found = syme;
609                         break;
610                 }
611         }
612
613         if (!found) {
614                 fprintf(stderr, "Sorry, %s is not active.\n", sym_filter);
615                 sleep(1);
616                 return;
617         } else
618                 parse_source(found);
619
620 out_free:
621         free(buf);
622 }
623
624 static void print_mapped_keys(void)
625 {
626         char *name = NULL;
627
628         if (sym_filter_entry) {
629                 struct symbol *sym = (struct symbol *)(sym_filter_entry+1);
630                 name = sym->name;
631         }
632
633         fprintf(stdout, "\nMapped keys:\n");
634         fprintf(stdout, "\t[d]     display refresh delay.             \t(%d)\n", delay_secs);
635         fprintf(stdout, "\t[e]     display entries (lines).           \t(%d)\n", print_entries);
636
637         if (nr_counters > 1)
638                 fprintf(stdout, "\t[E]     active event counter.              \t(%s)\n", event_name(sym_counter));
639
640         fprintf(stdout, "\t[f]     profile display filter (count).    \t(%d)\n", count_filter);
641
642         if (vmlinux_name) {
643                 fprintf(stdout, "\t[F]     annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter);
644                 fprintf(stdout, "\t[s]     annotate symbol.                   \t(%s)\n", name?: "NULL");
645                 fprintf(stdout, "\t[S]     stop annotation.\n");
646         }
647
648         if (nr_counters > 1)
649                 fprintf(stdout, "\t[w]     toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0);
650
651         fprintf(stdout,
652                 "\t[K]     hide kernel_symbols symbols.             \t(%s)\n",
653                 hide_kernel_symbols ? "yes" : "no");
654         fprintf(stdout,
655                 "\t[U]     hide user symbols.               \t(%s)\n",
656                 hide_user_symbols ? "yes" : "no");
657         fprintf(stdout, "\t[z]     toggle sample zeroing.             \t(%d)\n", zero ? 1 : 0);
658         fprintf(stdout, "\t[qQ]    quit.\n");
659 }
660
661 static int key_mapped(int c)
662 {
663         switch (c) {
664                 case 'd':
665                 case 'e':
666                 case 'f':
667                 case 'z':
668                 case 'q':
669                 case 'Q':
670                 case 'K':
671                 case 'U':
672                         return 1;
673                 case 'E':
674                 case 'w':
675                         return nr_counters > 1 ? 1 : 0;
676                 case 'F':
677                 case 's':
678                 case 'S':
679                         return vmlinux_name ? 1 : 0;
680                 default:
681                         break;
682         }
683
684         return 0;
685 }
686
687 static void handle_keypress(int c)
688 {
689         if (!key_mapped(c)) {
690                 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
691                 struct termios tc, save;
692
693                 print_mapped_keys();
694                 fprintf(stdout, "\nEnter selection, or unmapped key to continue: ");
695                 fflush(stdout);
696
697                 tcgetattr(0, &save);
698                 tc = save;
699                 tc.c_lflag &= ~(ICANON | ECHO);
700                 tc.c_cc[VMIN] = 0;
701                 tc.c_cc[VTIME] = 0;
702                 tcsetattr(0, TCSANOW, &tc);
703
704                 poll(&stdin_poll, 1, -1);
705                 c = getc(stdin);
706
707                 tcsetattr(0, TCSAFLUSH, &save);
708                 if (!key_mapped(c))
709                         return;
710         }
711
712         switch (c) {
713                 case 'd':
714                         prompt_integer(&delay_secs, "Enter display delay");
715                         if (delay_secs < 1)
716                                 delay_secs = 1;
717                         break;
718                 case 'e':
719                         prompt_integer(&print_entries, "Enter display entries (lines)");
720                         if (print_entries == 0) {
721                                 update_print_entries();
722                                 signal(SIGWINCH, sig_winch_handler);
723                         } else
724                                 signal(SIGWINCH, SIG_DFL);
725                         break;
726                 case 'E':
727                         if (nr_counters > 1) {
728                                 int i;
729
730                                 fprintf(stderr, "\nAvailable events:");
731                                 for (i = 0; i < nr_counters; i++)
732                                         fprintf(stderr, "\n\t%d %s", i, event_name(i));
733
734                                 prompt_integer(&sym_counter, "Enter details event counter");
735
736                                 if (sym_counter >= nr_counters) {
737                                         fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(0));
738                                         sym_counter = 0;
739                                         sleep(1);
740                                 }
741                         } else sym_counter = 0;
742                         break;
743                 case 'f':
744                         prompt_integer(&count_filter, "Enter display event count filter");
745                         break;
746                 case 'F':
747                         prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)");
748                         break;
749                 case 'K':
750                         hide_kernel_symbols = !hide_kernel_symbols;
751                         break;
752                 case 'q':
753                 case 'Q':
754                         printf("exiting.\n");
755                         exit(0);
756                 case 's':
757                         prompt_symbol(&sym_filter_entry, "Enter details symbol");
758                         break;
759                 case 'S':
760                         if (!sym_filter_entry)
761                                 break;
762                         else {
763                                 struct sym_entry *syme = sym_filter_entry;
764
765                                 pthread_mutex_lock(&syme->source_lock);
766                                 sym_filter_entry = NULL;
767                                 __zero_source_counters(syme);
768                                 pthread_mutex_unlock(&syme->source_lock);
769                         }
770                         break;
771                 case 'U':
772                         hide_user_symbols = !hide_user_symbols;
773                         break;
774                 case 'w':
775                         display_weighted = ~display_weighted;
776                         break;
777                 case 'z':
778                         zero = ~zero;
779                         break;
780                 default:
781                         break;
782         }
783 }
784
785 static void *display_thread(void *arg __used)
786 {
787         struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
788         struct termios tc, save;
789         int delay_msecs, c;
790
791         tcgetattr(0, &save);
792         tc = save;
793         tc.c_lflag &= ~(ICANON | ECHO);
794         tc.c_cc[VMIN] = 0;
795         tc.c_cc[VTIME] = 0;
796
797 repeat:
798         delay_msecs = delay_secs * 1000;
799         tcsetattr(0, TCSANOW, &tc);
800         /* trash return*/
801         getc(stdin);
802
803         do {
804                 print_sym_table();
805         } while (!poll(&stdin_poll, 1, delay_msecs) == 1);
806
807         c = getc(stdin);
808         tcsetattr(0, TCSAFLUSH, &save);
809
810         handle_keypress(c);
811         goto repeat;
812
813         return NULL;
814 }
815
816 /* Tag samples to be skipped. */
817 static const char *skip_symbols[] = {
818         "default_idle",
819         "cpu_idle",
820         "enter_idle",
821         "exit_idle",
822         "mwait_idle",
823         "mwait_idle_with_hints",
824         "poll_idle",
825         "ppc64_runlatch_off",
826         "pseries_dedicated_idle_sleep",
827         NULL
828 };
829
830 static int symbol_filter(struct map *map, struct symbol *sym)
831 {
832         struct sym_entry *syme;
833         const char *name = sym->name;
834         int i;
835
836         /*
837          * ppc64 uses function descriptors and appends a '.' to the
838          * start of every instruction address. Remove it.
839          */
840         if (name[0] == '.')
841                 name++;
842
843         if (!strcmp(name, "_text") ||
844             !strcmp(name, "_etext") ||
845             !strcmp(name, "_sinittext") ||
846             !strncmp("init_module", name, 11) ||
847             !strncmp("cleanup_module", name, 14) ||
848             strstr(name, "_text_start") ||
849             strstr(name, "_text_end"))
850                 return 1;
851
852         syme = symbol__priv(sym);
853         syme->map = map;
854         pthread_mutex_init(&syme->source_lock, NULL);
855         if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter))
856                 sym_filter_entry = syme;
857
858         for (i = 0; skip_symbols[i]; i++) {
859                 if (!strcmp(skip_symbols[i], name)) {
860                         syme->skip = 1;
861                         break;
862                 }
863         }
864
865         return 0;
866 }
867
868 static int parse_symbols(void)
869 {
870         if (dsos__load_kernel(vmlinux_name, symbol_filter, 1) <= 0)
871                 return -1;
872
873         if (dump_symtab)
874                 dsos__fprintf(stderr);
875
876         return 0;
877 }
878
879 static void event__process_sample(const event_t *self, int counter)
880 {
881         u64 ip = self->ip.ip;
882         struct map *map;
883         struct sym_entry *syme;
884         struct symbol *sym;
885         u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
886
887         switch (origin) {
888         case PERF_RECORD_MISC_USER: {
889                 struct thread *thread;
890
891                 if (hide_user_symbols)
892                         return;
893
894                 thread = threads__findnew(self->ip.pid);
895                 if (thread == NULL)
896                         return;
897
898                 map = thread__find_map(thread, ip);
899                 if (map != NULL) {
900                         ip = map->map_ip(map, ip);
901                         sym = map__find_symbol(map, ip, symbol_filter);
902                         if (sym == NULL)
903                                 return;
904                         userspace_samples++;
905                         break;
906                 }
907         }
908                 /*
909                  * If this is outside of all known maps,
910                  * and is a negative address, try to look it
911                  * up in the kernel dso, as it might be a
912                  * vsyscall or vdso (which executes in user-mode).
913                  */
914                 if ((long long)ip >= 0)
915                         return;
916                 /* Fall thru */
917         case PERF_RECORD_MISC_KERNEL:
918                 if (hide_kernel_symbols)
919                         return;
920
921                 sym = kernel_maps__find_symbol(ip, &map);
922                 if (sym == NULL)
923                         return;
924                 break;
925         default:
926                 return;
927         }
928
929         syme = symbol__priv(sym);
930
931         if (!syme->skip) {
932                 syme->count[counter]++;
933                 syme->origin = origin;
934                 record_precise_ip(syme, counter, ip);
935                 pthread_mutex_lock(&active_symbols_lock);
936                 if (list_empty(&syme->node) || !syme->node.next)
937                         __list_insert_active_sym(syme);
938                 pthread_mutex_unlock(&active_symbols_lock);
939                 ++samples;
940                 return;
941         }
942 }
943
944 static void event__process_mmap(event_t *self)
945 {
946         struct thread *thread = threads__findnew(self->mmap.pid);
947
948         if (thread != NULL) {
949                 struct map *map = map__new(&self->mmap, NULL, 0);
950                 if (map != NULL)
951                         thread__insert_map(thread, map);
952         }
953 }
954
955 static void event__process_comm(event_t *self)
956 {
957         struct thread *thread = threads__findnew(self->comm.pid);
958
959         if (thread != NULL)
960                 thread__set_comm(thread, self->comm.comm);
961 }
962
963 static int event__process(event_t *event)
964 {
965         switch (event->header.type) {
966         case PERF_RECORD_COMM:
967                 event__process_comm(event);
968                 break;
969         case PERF_RECORD_MMAP:
970                 event__process_mmap(event);
971                 break;
972         default:
973                 break;
974         }
975
976         return 0;
977 }
978
979 struct mmap_data {
980         int                     counter;
981         void                    *base;
982         int                     mask;
983         unsigned int            prev;
984 };
985
986 static unsigned int mmap_read_head(struct mmap_data *md)
987 {
988         struct perf_event_mmap_page *pc = md->base;
989         int head;
990
991         head = pc->data_head;
992         rmb();
993
994         return head;
995 }
996
997 static void mmap_read_counter(struct mmap_data *md)
998 {
999         unsigned int head = mmap_read_head(md);
1000         unsigned int old = md->prev;
1001         unsigned char *data = md->base + page_size;
1002         int diff;
1003
1004         /*
1005          * If we're further behind than half the buffer, there's a chance
1006          * the writer will bite our tail and mess up the samples under us.
1007          *
1008          * If we somehow ended up ahead of the head, we got messed up.
1009          *
1010          * In either case, truncate and restart at head.
1011          */
1012         diff = head - old;
1013         if (diff > md->mask / 2 || diff < 0) {
1014                 fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");
1015
1016                 /*
1017                  * head points to a known good entry, start there.
1018                  */
1019                 old = head;
1020         }
1021
1022         for (; old != head;) {
1023                 event_t *event = (event_t *)&data[old & md->mask];
1024
1025                 event_t event_copy;
1026
1027                 size_t size = event->header.size;
1028
1029                 /*
1030                  * Event straddles the mmap boundary -- header should always
1031                  * be inside due to u64 alignment of output.
1032                  */
1033                 if ((old & md->mask) + size != ((old + size) & md->mask)) {
1034                         unsigned int offset = old;
1035                         unsigned int len = min(sizeof(*event), size), cpy;
1036                         void *dst = &event_copy;
1037
1038                         do {
1039                                 cpy = min(md->mask + 1 - (offset & md->mask), len);
1040                                 memcpy(dst, &data[offset & md->mask], cpy);
1041                                 offset += cpy;
1042                                 dst += cpy;
1043                                 len -= cpy;
1044                         } while (len);
1045
1046                         event = &event_copy;
1047                 }
1048
1049                 if (event->header.type == PERF_RECORD_SAMPLE)
1050                         event__process_sample(event, md->counter);
1051                 else
1052                         event__process(event);
1053                 old += size;
1054         }
1055
1056         md->prev = old;
1057 }
1058
1059 static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
1060 static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
1061
1062 static void mmap_read(void)
1063 {
1064         int i, counter;
1065
1066         for (i = 0; i < nr_cpus; i++) {
1067                 for (counter = 0; counter < nr_counters; counter++)
1068                         mmap_read_counter(&mmap_array[i][counter]);
1069         }
1070 }
1071
1072 int nr_poll;
1073 int group_fd;
1074
1075 static void start_counter(int i, int counter)
1076 {
1077         struct perf_event_attr *attr;
1078         int cpu;
1079
1080         cpu = profile_cpu;
1081         if (target_pid == -1 && profile_cpu == -1)
1082                 cpu = i;
1083
1084         attr = attrs + counter;
1085
1086         attr->sample_type       = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
1087
1088         if (freq) {
1089                 attr->sample_type       |= PERF_SAMPLE_PERIOD;
1090                 attr->freq              = 1;
1091                 attr->sample_freq       = freq;
1092         }
1093
1094         attr->inherit           = (cpu < 0) && inherit;
1095         attr->mmap              = 1;
1096
1097 try_again:
1098         fd[i][counter] = sys_perf_event_open(attr, target_pid, cpu, group_fd, 0);
1099
1100         if (fd[i][counter] < 0) {
1101                 int err = errno;
1102
1103                 if (err == EPERM || err == EACCES)
1104                         die("No permission - are you root?\n");
1105                 /*
1106                  * If it's cycles then fall back to hrtimer
1107                  * based cpu-clock-tick sw counter, which
1108                  * is always available even if no PMU support:
1109                  */
1110                 if (attr->type == PERF_TYPE_HARDWARE
1111                         && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
1112
1113                         if (verbose)
1114                                 warning(" ... trying to fall back to cpu-clock-ticks\n");
1115
1116                         attr->type = PERF_TYPE_SOFTWARE;
1117                         attr->config = PERF_COUNT_SW_CPU_CLOCK;
1118                         goto try_again;
1119                 }
1120                 printf("\n");
1121                 error("perfcounter syscall returned with %d (%s)\n",
1122                         fd[i][counter], strerror(err));
1123                 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
1124                 exit(-1);
1125         }
1126         assert(fd[i][counter] >= 0);
1127         fcntl(fd[i][counter], F_SETFL, O_NONBLOCK);
1128
1129         /*
1130          * First counter acts as the group leader:
1131          */
1132         if (group && group_fd == -1)
1133                 group_fd = fd[i][counter];
1134
1135         event_array[nr_poll].fd = fd[i][counter];
1136         event_array[nr_poll].events = POLLIN;
1137         nr_poll++;
1138
1139         mmap_array[i][counter].counter = counter;
1140         mmap_array[i][counter].prev = 0;
1141         mmap_array[i][counter].mask = mmap_pages*page_size - 1;
1142         mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
1143                         PROT_READ, MAP_SHARED, fd[i][counter], 0);
1144         if (mmap_array[i][counter].base == MAP_FAILED)
1145                 die("failed to mmap with %d (%s)\n", errno, strerror(errno));
1146 }
1147
1148 static int __cmd_top(void)
1149 {
1150         pthread_t thread;
1151         int i, counter;
1152         int ret;
1153
1154         if (target_pid != -1)
1155                 event__synthesize_thread(target_pid, event__process);
1156         else
1157                 event__synthesize_threads(event__process);
1158
1159         for (i = 0; i < nr_cpus; i++) {
1160                 group_fd = -1;
1161                 for (counter = 0; counter < nr_counters; counter++)
1162                         start_counter(i, counter);
1163         }
1164
1165         /* Wait for a minimal set of events before starting the snapshot */
1166         poll(event_array, nr_poll, 100);
1167
1168         mmap_read();
1169
1170         if (pthread_create(&thread, NULL, display_thread, NULL)) {
1171                 printf("Could not create display thread.\n");
1172                 exit(-1);
1173         }
1174
1175         if (realtime_prio) {
1176                 struct sched_param param;
1177
1178                 param.sched_priority = realtime_prio;
1179                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
1180                         printf("Could not set realtime priority.\n");
1181                         exit(-1);
1182                 }
1183         }
1184
1185         while (1) {
1186                 int hits = samples;
1187
1188                 mmap_read();
1189
1190                 if (hits == samples)
1191                         ret = poll(event_array, nr_poll, 100);
1192         }
1193
1194         return 0;
1195 }
1196
1197 static const char * const top_usage[] = {
1198         "perf top [<options>]",
1199         NULL
1200 };
1201
1202 static const struct option options[] = {
1203         OPT_CALLBACK('e', "event", NULL, "event",
1204                      "event selector. use 'perf list' to list available events",
1205                      parse_events),
1206         OPT_INTEGER('c', "count", &default_interval,
1207                     "event period to sample"),
1208         OPT_INTEGER('p', "pid", &target_pid,
1209                     "profile events on existing pid"),
1210         OPT_BOOLEAN('a', "all-cpus", &system_wide,
1211                             "system-wide collection from all CPUs"),
1212         OPT_INTEGER('C', "CPU", &profile_cpu,
1213                     "CPU to profile on"),
1214         OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"),
1215         OPT_BOOLEAN('K', "hide_kernel_symbols", &hide_kernel_symbols,
1216                     "hide kernel symbols"),
1217         OPT_INTEGER('m', "mmap-pages", &mmap_pages,
1218                     "number of mmap data pages"),
1219         OPT_INTEGER('r', "realtime", &realtime_prio,
1220                     "collect data with this RT SCHED_FIFO priority"),
1221         OPT_INTEGER('d', "delay", &delay_secs,
1222                     "number of seconds to delay between refreshes"),
1223         OPT_BOOLEAN('D', "dump-symtab", &dump_symtab,
1224                             "dump the symbol table used for profiling"),
1225         OPT_INTEGER('f', "count-filter", &count_filter,
1226                     "only display functions with more events than this"),
1227         OPT_BOOLEAN('g', "group", &group,
1228                             "put the counters into a counter group"),
1229         OPT_BOOLEAN('i', "inherit", &inherit,
1230                     "child tasks inherit counters"),
1231         OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name",
1232                     "symbol to annotate - requires -k option"),
1233         OPT_BOOLEAN('z', "zero", &zero,
1234                     "zero history across updates"),
1235         OPT_INTEGER('F', "freq", &freq,
1236                     "profile at this frequency"),
1237         OPT_INTEGER('E', "entries", &print_entries,
1238                     "display this many functions"),
1239         OPT_BOOLEAN('U', "hide_user_symbols", &hide_user_symbols,
1240                     "hide user symbols"),
1241         OPT_BOOLEAN('v', "verbose", &verbose,
1242                     "be more verbose (show counter open errors, etc)"),
1243         OPT_END()
1244 };
1245
1246 int cmd_top(int argc, const char **argv, const char *prefix __used)
1247 {
1248         int counter;
1249
1250         symbol__init(sizeof(struct sym_entry));
1251
1252         page_size = sysconf(_SC_PAGE_SIZE);
1253
1254         argc = parse_options(argc, argv, options, top_usage, 0);
1255         if (argc)
1256                 usage_with_options(top_usage, options);
1257
1258         /* CPU and PID are mutually exclusive */
1259         if (target_pid != -1 && profile_cpu != -1) {
1260                 printf("WARNING: PID switch overriding CPU\n");
1261                 sleep(1);
1262                 profile_cpu = -1;
1263         }
1264
1265         if (!nr_counters)
1266                 nr_counters = 1;
1267
1268         if (delay_secs < 1)
1269                 delay_secs = 1;
1270
1271         parse_symbols();
1272         parse_source(sym_filter_entry);
1273
1274
1275         /*
1276          * User specified count overrides default frequency.
1277          */
1278         if (default_interval)
1279                 freq = 0;
1280         else if (freq) {
1281                 default_interval = freq;
1282         } else {
1283                 fprintf(stderr, "frequency and count are zero, aborting\n");
1284                 exit(EXIT_FAILURE);
1285         }
1286
1287         /*
1288          * Fill in the ones not specifically initialized via -c:
1289          */
1290         for (counter = 0; counter < nr_counters; counter++) {
1291                 if (attrs[counter].sample_period)
1292                         continue;
1293
1294                 attrs[counter].sample_period = default_interval;
1295         }
1296
1297         nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
1298         assert(nr_cpus <= MAX_NR_CPUS);
1299         assert(nr_cpus >= 0);
1300
1301         if (target_pid != -1 || profile_cpu != -1)
1302                 nr_cpus = 1;
1303
1304         if (print_entries == 0) {
1305                 update_print_entries();
1306                 signal(SIGWINCH, sig_winch_handler);
1307         }
1308
1309         return __cmd_top();
1310 }