]> Pileus Git - ~andy/linux/blob - tools/perf/builtin-record.c
Merge tag 'scsi-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
[~andy/linux] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9
10 #include "perf.h"
11
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include "util/parse-options.h"
15 #include "util/parse-events.h"
16
17 #include "util/header.h"
18 #include "util/event.h"
19 #include "util/evlist.h"
20 #include "util/evsel.h"
21 #include "util/debug.h"
22 #include "util/session.h"
23 #include "util/tool.h"
24 #include "util/symbol.h"
25 #include "util/cpumap.h"
26 #include "util/thread_map.h"
27 #include "util/data.h"
28
29 #include <unistd.h>
30 #include <sched.h>
31 #include <sys/mman.h>
32
33 #ifndef HAVE_ON_EXIT_SUPPORT
34 #ifndef ATEXIT_MAX
35 #define ATEXIT_MAX 32
36 #endif
37 static int __on_exit_count = 0;
38 typedef void (*on_exit_func_t) (int, void *);
39 static on_exit_func_t __on_exit_funcs[ATEXIT_MAX];
40 static void *__on_exit_args[ATEXIT_MAX];
41 static int __exitcode = 0;
42 static void __handle_on_exit_funcs(void);
43 static int on_exit(on_exit_func_t function, void *arg);
44 #define exit(x) (exit)(__exitcode = (x))
45
46 static int on_exit(on_exit_func_t function, void *arg)
47 {
48         if (__on_exit_count == ATEXIT_MAX)
49                 return -ENOMEM;
50         else if (__on_exit_count == 0)
51                 atexit(__handle_on_exit_funcs);
52         __on_exit_funcs[__on_exit_count] = function;
53         __on_exit_args[__on_exit_count++] = arg;
54         return 0;
55 }
56
57 static void __handle_on_exit_funcs(void)
58 {
59         int i;
60         for (i = 0; i < __on_exit_count; i++)
61                 __on_exit_funcs[i] (__exitcode, __on_exit_args[i]);
62 }
63 #endif
64
65 struct perf_record {
66         struct perf_tool        tool;
67         struct perf_record_opts opts;
68         u64                     bytes_written;
69         struct perf_data_file   file;
70         struct perf_evlist      *evlist;
71         struct perf_session     *session;
72         const char              *progname;
73         int                     realtime_prio;
74         bool                    no_buildid;
75         bool                    no_buildid_cache;
76         long                    samples;
77 };
78
79 static int do_write_output(struct perf_record *rec, void *buf, size_t size)
80 {
81         struct perf_data_file *file = &rec->file;
82
83         while (size) {
84                 ssize_t ret = write(file->fd, buf, size);
85
86                 if (ret < 0) {
87                         pr_err("failed to write perf data, error: %m\n");
88                         return -1;
89                 }
90
91                 size -= ret;
92                 buf += ret;
93
94                 rec->bytes_written += ret;
95         }
96
97         return 0;
98 }
99
100 static int write_output(struct perf_record *rec, void *buf, size_t size)
101 {
102         return do_write_output(rec, buf, size);
103 }
104
105 static int process_synthesized_event(struct perf_tool *tool,
106                                      union perf_event *event,
107                                      struct perf_sample *sample __maybe_unused,
108                                      struct machine *machine __maybe_unused)
109 {
110         struct perf_record *rec = container_of(tool, struct perf_record, tool);
111         if (write_output(rec, event, event->header.size) < 0)
112                 return -1;
113
114         return 0;
115 }
116
117 static int perf_record__mmap_read(struct perf_record *rec,
118                                    struct perf_mmap *md)
119 {
120         unsigned int head = perf_mmap__read_head(md);
121         unsigned int old = md->prev;
122         unsigned char *data = md->base + page_size;
123         unsigned long size;
124         void *buf;
125         int rc = 0;
126
127         if (old == head)
128                 return 0;
129
130         rec->samples++;
131
132         size = head - old;
133
134         if ((old & md->mask) + size != (head & md->mask)) {
135                 buf = &data[old & md->mask];
136                 size = md->mask + 1 - (old & md->mask);
137                 old += size;
138
139                 if (write_output(rec, buf, size) < 0) {
140                         rc = -1;
141                         goto out;
142                 }
143         }
144
145         buf = &data[old & md->mask];
146         size = head - old;
147         old += size;
148
149         if (write_output(rec, buf, size) < 0) {
150                 rc = -1;
151                 goto out;
152         }
153
154         md->prev = old;
155         perf_mmap__write_tail(md, old);
156
157 out:
158         return rc;
159 }
160
161 static volatile int done = 0;
162 static volatile int signr = -1;
163 static volatile int child_finished = 0;
164
165 static void sig_handler(int sig)
166 {
167         if (sig == SIGCHLD)
168                 child_finished = 1;
169
170         done = 1;
171         signr = sig;
172 }
173
174 static void perf_record__sig_exit(int exit_status __maybe_unused, void *arg)
175 {
176         struct perf_record *rec = arg;
177         int status;
178
179         if (rec->evlist->workload.pid > 0) {
180                 if (!child_finished)
181                         kill(rec->evlist->workload.pid, SIGTERM);
182
183                 wait(&status);
184                 if (WIFSIGNALED(status))
185                         psignal(WTERMSIG(status), rec->progname);
186         }
187
188         if (signr == -1 || signr == SIGUSR1)
189                 return;
190
191         signal(signr, SIG_DFL);
192 }
193
194 static int perf_record__open(struct perf_record *rec)
195 {
196         char msg[512];
197         struct perf_evsel *pos;
198         struct perf_evlist *evlist = rec->evlist;
199         struct perf_session *session = rec->session;
200         struct perf_record_opts *opts = &rec->opts;
201         int rc = 0;
202
203         perf_evlist__config(evlist, opts);
204
205         list_for_each_entry(pos, &evlist->entries, node) {
206 try_again:
207                 if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
208                         if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
209                                 if (verbose)
210                                         ui__warning("%s\n", msg);
211                                 goto try_again;
212                         }
213
214                         rc = -errno;
215                         perf_evsel__open_strerror(pos, &opts->target,
216                                                   errno, msg, sizeof(msg));
217                         ui__error("%s\n", msg);
218                         goto out;
219                 }
220         }
221
222         if (perf_evlist__apply_filters(evlist)) {
223                 error("failed to set filter with %d (%s)\n", errno,
224                         strerror(errno));
225                 rc = -1;
226                 goto out;
227         }
228
229         if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
230                 if (errno == EPERM) {
231                         pr_err("Permission error mapping pages.\n"
232                                "Consider increasing "
233                                "/proc/sys/kernel/perf_event_mlock_kb,\n"
234                                "or try again with a smaller value of -m/--mmap_pages.\n"
235                                "(current value: %d)\n", opts->mmap_pages);
236                         rc = -errno;
237                 } else {
238                         pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
239                         rc = -errno;
240                 }
241                 goto out;
242         }
243
244         session->evlist = evlist;
245         perf_session__set_id_hdr_size(session);
246 out:
247         return rc;
248 }
249
250 static int process_buildids(struct perf_record *rec)
251 {
252         struct perf_data_file *file  = &rec->file;
253         struct perf_session *session = rec->session;
254         u64 start = session->header.data_offset;
255
256         u64 size = lseek(file->fd, 0, SEEK_CUR);
257         if (size == 0)
258                 return 0;
259
260         return __perf_session__process_events(session, start,
261                                               size - start,
262                                               size, &build_id__mark_dso_hit_ops);
263 }
264
265 static void perf_record__exit(int status, void *arg)
266 {
267         struct perf_record *rec = arg;
268         struct perf_data_file *file = &rec->file;
269
270         if (status != 0)
271                 return;
272
273         if (!file->is_pipe) {
274                 rec->session->header.data_size += rec->bytes_written;
275
276                 if (!rec->no_buildid)
277                         process_buildids(rec);
278                 perf_session__write_header(rec->session, rec->evlist,
279                                            file->fd, true);
280                 perf_session__delete(rec->session);
281                 perf_evlist__delete(rec->evlist);
282                 symbol__exit();
283         }
284 }
285
286 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
287 {
288         int err;
289         struct perf_tool *tool = data;
290         /*
291          *As for guest kernel when processing subcommand record&report,
292          *we arrange module mmap prior to guest kernel mmap and trigger
293          *a preload dso because default guest module symbols are loaded
294          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
295          *method is used to avoid symbol missing when the first addr is
296          *in module instead of in guest kernel.
297          */
298         err = perf_event__synthesize_modules(tool, process_synthesized_event,
299                                              machine);
300         if (err < 0)
301                 pr_err("Couldn't record guest kernel [%d]'s reference"
302                        " relocation symbol.\n", machine->pid);
303
304         /*
305          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
306          * have no _text sometimes.
307          */
308         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
309                                                  machine, "_text");
310         if (err < 0)
311                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
312                                                          machine, "_stext");
313         if (err < 0)
314                 pr_err("Couldn't record guest kernel [%d]'s reference"
315                        " relocation symbol.\n", machine->pid);
316 }
317
318 static struct perf_event_header finished_round_event = {
319         .size = sizeof(struct perf_event_header),
320         .type = PERF_RECORD_FINISHED_ROUND,
321 };
322
323 static int perf_record__mmap_read_all(struct perf_record *rec)
324 {
325         int i;
326         int rc = 0;
327
328         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
329                 if (rec->evlist->mmap[i].base) {
330                         if (perf_record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
331                                 rc = -1;
332                                 goto out;
333                         }
334                 }
335         }
336
337         if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
338                 rc = write_output(rec, &finished_round_event,
339                                   sizeof(finished_round_event));
340
341 out:
342         return rc;
343 }
344
345 static void perf_record__init_features(struct perf_record *rec)
346 {
347         struct perf_evlist *evsel_list = rec->evlist;
348         struct perf_session *session = rec->session;
349         int feat;
350
351         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
352                 perf_header__set_feat(&session->header, feat);
353
354         if (rec->no_buildid)
355                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
356
357         if (!have_tracepoints(&evsel_list->entries))
358                 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
359
360         if (!rec->opts.branch_stack)
361                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
362 }
363
364 static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
365 {
366         int err;
367         unsigned long waking = 0;
368         const bool forks = argc > 0;
369         struct machine *machine;
370         struct perf_tool *tool = &rec->tool;
371         struct perf_record_opts *opts = &rec->opts;
372         struct perf_evlist *evsel_list = rec->evlist;
373         struct perf_data_file *file = &rec->file;
374         struct perf_session *session;
375         bool disabled = false;
376
377         rec->progname = argv[0];
378
379         on_exit(perf_record__sig_exit, rec);
380         signal(SIGCHLD, sig_handler);
381         signal(SIGINT, sig_handler);
382         signal(SIGUSR1, sig_handler);
383         signal(SIGTERM, sig_handler);
384
385         session = perf_session__new(file, false, NULL);
386         if (session == NULL) {
387                 pr_err("Not enough memory for reading perf file header\n");
388                 return -1;
389         }
390
391         rec->session = session;
392
393         perf_record__init_features(rec);
394
395         if (forks) {
396                 err = perf_evlist__prepare_workload(evsel_list, &opts->target,
397                                                     argv, file->is_pipe,
398                                                     true);
399                 if (err < 0) {
400                         pr_err("Couldn't run the workload!\n");
401                         goto out_delete_session;
402                 }
403         }
404
405         if (perf_record__open(rec) != 0) {
406                 err = -1;
407                 goto out_delete_session;
408         }
409
410         if (!evsel_list->nr_groups)
411                 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
412
413         /*
414          * perf_session__delete(session) will be called at perf_record__exit()
415          */
416         on_exit(perf_record__exit, rec);
417
418         if (file->is_pipe) {
419                 err = perf_header__write_pipe(file->fd);
420                 if (err < 0)
421                         goto out_delete_session;
422         } else {
423                 err = perf_session__write_header(session, evsel_list,
424                                                  file->fd, false);
425                 if (err < 0)
426                         goto out_delete_session;
427         }
428
429         if (!rec->no_buildid
430             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
431                 pr_err("Couldn't generate buildids. "
432                        "Use --no-buildid to profile anyway.\n");
433                 err = -1;
434                 goto out_delete_session;
435         }
436
437         machine = &session->machines.host;
438
439         if (file->is_pipe) {
440                 err = perf_event__synthesize_attrs(tool, session,
441                                                    process_synthesized_event);
442                 if (err < 0) {
443                         pr_err("Couldn't synthesize attrs.\n");
444                         goto out_delete_session;
445                 }
446
447                 if (have_tracepoints(&evsel_list->entries)) {
448                         /*
449                          * FIXME err <= 0 here actually means that
450                          * there were no tracepoints so its not really
451                          * an error, just that we don't need to
452                          * synthesize anything.  We really have to
453                          * return this more properly and also
454                          * propagate errors that now are calling die()
455                          */
456                         err = perf_event__synthesize_tracing_data(tool, file->fd, evsel_list,
457                                                                   process_synthesized_event);
458                         if (err <= 0) {
459                                 pr_err("Couldn't record tracing data.\n");
460                                 goto out_delete_session;
461                         }
462                         rec->bytes_written += err;
463                 }
464         }
465
466         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
467                                                  machine, "_text");
468         if (err < 0)
469                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
470                                                          machine, "_stext");
471         if (err < 0)
472                 pr_err("Couldn't record kernel reference relocation symbol\n"
473                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
474                        "Check /proc/kallsyms permission or run as root.\n");
475
476         err = perf_event__synthesize_modules(tool, process_synthesized_event,
477                                              machine);
478         if (err < 0)
479                 pr_err("Couldn't record kernel module information.\n"
480                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
481                        "Check /proc/modules permission or run as root.\n");
482
483         if (perf_guest) {
484                 machines__process_guests(&session->machines,
485                                          perf_event__synthesize_guest_os, tool);
486         }
487
488         err = __machine__synthesize_threads(machine, tool, &opts->target, evsel_list->threads,
489                                             process_synthesized_event, opts->sample_address);
490         if (err != 0)
491                 goto out_delete_session;
492
493         if (rec->realtime_prio) {
494                 struct sched_param param;
495
496                 param.sched_priority = rec->realtime_prio;
497                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
498                         pr_err("Could not set realtime priority.\n");
499                         err = -1;
500                         goto out_delete_session;
501                 }
502         }
503
504         /*
505          * When perf is starting the traced process, all the events
506          * (apart from group members) have enable_on_exec=1 set,
507          * so don't spoil it by prematurely enabling them.
508          */
509         if (!target__none(&opts->target))
510                 perf_evlist__enable(evsel_list);
511
512         /*
513          * Let the child rip
514          */
515         if (forks)
516                 perf_evlist__start_workload(evsel_list);
517
518         for (;;) {
519                 int hits = rec->samples;
520
521                 if (perf_record__mmap_read_all(rec) < 0) {
522                         err = -1;
523                         goto out_delete_session;
524                 }
525
526                 if (hits == rec->samples) {
527                         if (done)
528                                 break;
529                         err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
530                         waking++;
531                 }
532
533                 /*
534                  * When perf is starting the traced process, at the end events
535                  * die with the process and we wait for that. Thus no need to
536                  * disable events in this case.
537                  */
538                 if (done && !disabled && !target__none(&opts->target)) {
539                         perf_evlist__disable(evsel_list);
540                         disabled = true;
541                 }
542         }
543
544         if (quiet || signr == SIGUSR1)
545                 return 0;
546
547         fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
548
549         /*
550          * Approximate RIP event size: 24 bytes.
551          */
552         fprintf(stderr,
553                 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
554                 (double)rec->bytes_written / 1024.0 / 1024.0,
555                 file->path,
556                 rec->bytes_written / 24);
557
558         return 0;
559
560 out_delete_session:
561         perf_session__delete(session);
562         return err;
563 }
564
565 #define BRANCH_OPT(n, m) \
566         { .name = n, .mode = (m) }
567
568 #define BRANCH_END { .name = NULL }
569
570 struct branch_mode {
571         const char *name;
572         int mode;
573 };
574
575 static const struct branch_mode branch_modes[] = {
576         BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
577         BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
578         BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
579         BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
580         BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
581         BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
582         BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
583         BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
584         BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
585         BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
586         BRANCH_END
587 };
588
589 static int
590 parse_branch_stack(const struct option *opt, const char *str, int unset)
591 {
592 #define ONLY_PLM \
593         (PERF_SAMPLE_BRANCH_USER        |\
594          PERF_SAMPLE_BRANCH_KERNEL      |\
595          PERF_SAMPLE_BRANCH_HV)
596
597         uint64_t *mode = (uint64_t *)opt->value;
598         const struct branch_mode *br;
599         char *s, *os = NULL, *p;
600         int ret = -1;
601
602         if (unset)
603                 return 0;
604
605         /*
606          * cannot set it twice, -b + --branch-filter for instance
607          */
608         if (*mode)
609                 return -1;
610
611         /* str may be NULL in case no arg is passed to -b */
612         if (str) {
613                 /* because str is read-only */
614                 s = os = strdup(str);
615                 if (!s)
616                         return -1;
617
618                 for (;;) {
619                         p = strchr(s, ',');
620                         if (p)
621                                 *p = '\0';
622
623                         for (br = branch_modes; br->name; br++) {
624                                 if (!strcasecmp(s, br->name))
625                                         break;
626                         }
627                         if (!br->name) {
628                                 ui__warning("unknown branch filter %s,"
629                                             " check man page\n", s);
630                                 goto error;
631                         }
632
633                         *mode |= br->mode;
634
635                         if (!p)
636                                 break;
637
638                         s = p + 1;
639                 }
640         }
641         ret = 0;
642
643         /* default to any branch */
644         if ((*mode & ~ONLY_PLM) == 0) {
645                 *mode = PERF_SAMPLE_BRANCH_ANY;
646         }
647 error:
648         free(os);
649         return ret;
650 }
651
652 #ifdef HAVE_LIBUNWIND_SUPPORT
653 static int get_stack_size(char *str, unsigned long *_size)
654 {
655         char *endptr;
656         unsigned long size;
657         unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
658
659         size = strtoul(str, &endptr, 0);
660
661         do {
662                 if (*endptr)
663                         break;
664
665                 size = round_up(size, sizeof(u64));
666                 if (!size || size > max_size)
667                         break;
668
669                 *_size = size;
670                 return 0;
671
672         } while (0);
673
674         pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
675                max_size, str);
676         return -1;
677 }
678 #endif /* HAVE_LIBUNWIND_SUPPORT */
679
680 int record_parse_callchain(const char *arg, struct perf_record_opts *opts)
681 {
682         char *tok, *name, *saveptr = NULL;
683         char *buf;
684         int ret = -1;
685
686         /* We need buffer that we know we can write to. */
687         buf = malloc(strlen(arg) + 1);
688         if (!buf)
689                 return -ENOMEM;
690
691         strcpy(buf, arg);
692
693         tok = strtok_r((char *)buf, ",", &saveptr);
694         name = tok ? : (char *)buf;
695
696         do {
697                 /* Framepointer style */
698                 if (!strncmp(name, "fp", sizeof("fp"))) {
699                         if (!strtok_r(NULL, ",", &saveptr)) {
700                                 opts->call_graph = CALLCHAIN_FP;
701                                 ret = 0;
702                         } else
703                                 pr_err("callchain: No more arguments "
704                                        "needed for -g fp\n");
705                         break;
706
707 #ifdef HAVE_LIBUNWIND_SUPPORT
708                 /* Dwarf style */
709                 } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
710                         const unsigned long default_stack_dump_size = 8192;
711
712                         ret = 0;
713                         opts->call_graph = CALLCHAIN_DWARF;
714                         opts->stack_dump_size = default_stack_dump_size;
715
716                         tok = strtok_r(NULL, ",", &saveptr);
717                         if (tok) {
718                                 unsigned long size = 0;
719
720                                 ret = get_stack_size(tok, &size);
721                                 opts->stack_dump_size = size;
722                         }
723 #endif /* HAVE_LIBUNWIND_SUPPORT */
724                 } else {
725                         pr_err("callchain: Unknown --call-graph option "
726                                "value: %s\n", arg);
727                         break;
728                 }
729
730         } while (0);
731
732         free(buf);
733         return ret;
734 }
735
736 static void callchain_debug(struct perf_record_opts *opts)
737 {
738         pr_debug("callchain: type %d\n", opts->call_graph);
739
740         if (opts->call_graph == CALLCHAIN_DWARF)
741                 pr_debug("callchain: stack dump size %d\n",
742                          opts->stack_dump_size);
743 }
744
745 int record_parse_callchain_opt(const struct option *opt,
746                                const char *arg,
747                                int unset)
748 {
749         struct perf_record_opts *opts = opt->value;
750         int ret;
751
752         /* --no-call-graph */
753         if (unset) {
754                 opts->call_graph = CALLCHAIN_NONE;
755                 pr_debug("callchain: disabled\n");
756                 return 0;
757         }
758
759         ret = record_parse_callchain(arg, opts);
760         if (!ret)
761                 callchain_debug(opts);
762
763         return ret;
764 }
765
766 int record_callchain_opt(const struct option *opt,
767                          const char *arg __maybe_unused,
768                          int unset __maybe_unused)
769 {
770         struct perf_record_opts *opts = opt->value;
771
772         if (opts->call_graph == CALLCHAIN_NONE)
773                 opts->call_graph = CALLCHAIN_FP;
774
775         callchain_debug(opts);
776         return 0;
777 }
778
779 static const char * const record_usage[] = {
780         "perf record [<options>] [<command>]",
781         "perf record [<options>] -- <command> [<options>]",
782         NULL
783 };
784
785 /*
786  * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
787  * because we need to have access to it in perf_record__exit, that is called
788  * after cmd_record() exits, but since record_options need to be accessible to
789  * builtin-script, leave it here.
790  *
791  * At least we don't ouch it in all the other functions here directly.
792  *
793  * Just say no to tons of global variables, sigh.
794  */
795 static struct perf_record record = {
796         .opts = {
797                 .mmap_pages          = UINT_MAX,
798                 .user_freq           = UINT_MAX,
799                 .user_interval       = ULLONG_MAX,
800                 .freq                = 4000,
801                 .target              = {
802                         .uses_mmap   = true,
803                 },
804         },
805 };
806
807 #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: "
808
809 #ifdef HAVE_LIBUNWIND_SUPPORT
810 const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf";
811 #else
812 const char record_callchain_help[] = CALLCHAIN_HELP "fp";
813 #endif
814
815 /*
816  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
817  * with it and switch to use the library functions in perf_evlist that came
818  * from builtin-record.c, i.e. use perf_record_opts,
819  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
820  * using pipes, etc.
821  */
822 const struct option record_options[] = {
823         OPT_CALLBACK('e', "event", &record.evlist, "event",
824                      "event selector. use 'perf list' to list available events",
825                      parse_events_option),
826         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
827                      "event filter", parse_filter),
828         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
829                     "record events on existing process id"),
830         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
831                     "record events on existing thread id"),
832         OPT_INTEGER('r', "realtime", &record.realtime_prio,
833                     "collect data with this RT SCHED_FIFO priority"),
834         OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
835                     "collect data without buffering"),
836         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
837                     "collect raw sample records from all opened counters"),
838         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
839                             "system-wide collection from all CPUs"),
840         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
841                     "list of cpus to monitor"),
842         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
843         OPT_STRING('o', "output", &record.file.path, "file",
844                     "output file name"),
845         OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
846                     "child tasks do not inherit counters"),
847         OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
848         OPT_CALLBACK('m', "mmap-pages", &record.opts.mmap_pages, "pages",
849                      "number of mmap data pages",
850                      perf_evlist__parse_mmap_pages),
851         OPT_BOOLEAN(0, "group", &record.opts.group,
852                     "put the counters into a counter group"),
853         OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
854                            NULL, "enables call-graph recording" ,
855                            &record_callchain_opt),
856         OPT_CALLBACK(0, "call-graph", &record.opts,
857                      "mode[,dump_size]", record_callchain_help,
858                      &record_parse_callchain_opt),
859         OPT_INCR('v', "verbose", &verbose,
860                     "be more verbose (show counter open errors, etc)"),
861         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
862         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
863                     "per thread counts"),
864         OPT_BOOLEAN('d', "data", &record.opts.sample_address,
865                     "Sample addresses"),
866         OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
867         OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
868         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
869                     "don't sample"),
870         OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
871                     "do not update the buildid cache"),
872         OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
873                     "do not collect buildids in perf.data"),
874         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
875                      "monitor event in cgroup name only",
876                      parse_cgroups),
877         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
878                    "user to profile"),
879
880         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
881                      "branch any", "sample any taken branches",
882                      parse_branch_stack),
883
884         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
885                      "branch filter mask", "branch stack filter modes",
886                      parse_branch_stack),
887         OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
888                     "sample by weight (on special events only)"),
889         OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
890                     "sample transaction flags (special events only)"),
891         OPT_BOOLEAN(0, "force-per-cpu", &record.opts.target.force_per_cpu,
892                     "force the use of per-cpu mmaps"),
893         OPT_END()
894 };
895
896 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
897 {
898         int err = -ENOMEM;
899         struct perf_evlist *evsel_list;
900         struct perf_record *rec = &record;
901         char errbuf[BUFSIZ];
902
903         evsel_list = perf_evlist__new();
904         if (evsel_list == NULL)
905                 return -ENOMEM;
906
907         rec->evlist = evsel_list;
908
909         argc = parse_options(argc, argv, record_options, record_usage,
910                             PARSE_OPT_STOP_AT_NON_OPTION);
911         if (!argc && target__none(&rec->opts.target))
912                 usage_with_options(record_usage, record_options);
913
914         if (nr_cgroups && !rec->opts.target.system_wide) {
915                 ui__error("cgroup monitoring only available in"
916                           " system-wide mode\n");
917                 usage_with_options(record_usage, record_options);
918         }
919
920         symbol__init();
921
922         if (symbol_conf.kptr_restrict)
923                 pr_warning(
924 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
925 "check /proc/sys/kernel/kptr_restrict.\n\n"
926 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
927 "file is not found in the buildid cache or in the vmlinux path.\n\n"
928 "Samples in kernel modules won't be resolved at all.\n\n"
929 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
930 "even with a suitable vmlinux or kallsyms file.\n\n");
931
932         if (rec->no_buildid_cache || rec->no_buildid)
933                 disable_buildid_cache();
934
935         if (evsel_list->nr_entries == 0 &&
936             perf_evlist__add_default(evsel_list) < 0) {
937                 pr_err("Not enough memory for event selector list\n");
938                 goto out_symbol_exit;
939         }
940
941         err = target__validate(&rec->opts.target);
942         if (err) {
943                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
944                 ui__warning("%s", errbuf);
945         }
946
947         err = target__parse_uid(&rec->opts.target);
948         if (err) {
949                 int saved_errno = errno;
950
951                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
952                 ui__error("%s", errbuf);
953
954                 err = -saved_errno;
955                 goto out_symbol_exit;
956         }
957
958         err = -ENOMEM;
959         if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
960                 usage_with_options(record_usage, record_options);
961
962         if (perf_record_opts__config(&rec->opts)) {
963                 err = -EINVAL;
964                 goto out_free_fd;
965         }
966
967         err = __cmd_record(&record, argc, argv);
968
969         perf_evlist__munmap(evsel_list);
970         perf_evlist__close(evsel_list);
971 out_free_fd:
972         perf_evlist__delete_maps(evsel_list);
973 out_symbol_exit:
974         symbol__exit();
975         return err;
976 }