]> Pileus Git - ~andy/linux/blob - tools/perf/builtin-record.c
perf evlist: Move the SIGUSR1 error reporting logic to prepare_workload
[~andy/linux] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9
10 #include "perf.h"
11
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include "util/parse-options.h"
15 #include "util/parse-events.h"
16
17 #include "util/header.h"
18 #include "util/event.h"
19 #include "util/evlist.h"
20 #include "util/evsel.h"
21 #include "util/debug.h"
22 #include "util/session.h"
23 #include "util/tool.h"
24 #include "util/symbol.h"
25 #include "util/cpumap.h"
26 #include "util/thread_map.h"
27 #include "util/data.h"
28
29 #include <unistd.h>
30 #include <sched.h>
31 #include <sys/mman.h>
32
33 #ifndef HAVE_ON_EXIT_SUPPORT
34 #ifndef ATEXIT_MAX
35 #define ATEXIT_MAX 32
36 #endif
37 static int __on_exit_count = 0;
38 typedef void (*on_exit_func_t) (int, void *);
39 static on_exit_func_t __on_exit_funcs[ATEXIT_MAX];
40 static void *__on_exit_args[ATEXIT_MAX];
41 static int __exitcode = 0;
42 static void __handle_on_exit_funcs(void);
43 static int on_exit(on_exit_func_t function, void *arg);
44 #define exit(x) (exit)(__exitcode = (x))
45
46 static int on_exit(on_exit_func_t function, void *arg)
47 {
48         if (__on_exit_count == ATEXIT_MAX)
49                 return -ENOMEM;
50         else if (__on_exit_count == 0)
51                 atexit(__handle_on_exit_funcs);
52         __on_exit_funcs[__on_exit_count] = function;
53         __on_exit_args[__on_exit_count++] = arg;
54         return 0;
55 }
56
57 static void __handle_on_exit_funcs(void)
58 {
59         int i;
60         for (i = 0; i < __on_exit_count; i++)
61                 __on_exit_funcs[i] (__exitcode, __on_exit_args[i]);
62 }
63 #endif
64
65 struct record {
66         struct perf_tool        tool;
67         struct record_opts      opts;
68         u64                     bytes_written;
69         struct perf_data_file   file;
70         struct perf_evlist      *evlist;
71         struct perf_session     *session;
72         const char              *progname;
73         int                     realtime_prio;
74         bool                    no_buildid;
75         bool                    no_buildid_cache;
76         long                    samples;
77 };
78
79 static int record__write(struct record *rec, void *bf, size_t size)
80 {
81         if (perf_data_file__write(rec->session->file, bf, size) < 0) {
82                 pr_err("failed to write perf data, error: %m\n");
83                 return -1;
84         }
85
86         rec->bytes_written += size;
87         return 0;
88 }
89
90 static int process_synthesized_event(struct perf_tool *tool,
91                                      union perf_event *event,
92                                      struct perf_sample *sample __maybe_unused,
93                                      struct machine *machine __maybe_unused)
94 {
95         struct record *rec = container_of(tool, struct record, tool);
96         return record__write(rec, event, event->header.size);
97 }
98
99 static int record__mmap_read(struct record *rec, struct perf_mmap *md)
100 {
101         unsigned int head = perf_mmap__read_head(md);
102         unsigned int old = md->prev;
103         unsigned char *data = md->base + page_size;
104         unsigned long size;
105         void *buf;
106         int rc = 0;
107
108         if (old == head)
109                 return 0;
110
111         rec->samples++;
112
113         size = head - old;
114
115         if ((old & md->mask) + size != (head & md->mask)) {
116                 buf = &data[old & md->mask];
117                 size = md->mask + 1 - (old & md->mask);
118                 old += size;
119
120                 if (record__write(rec, buf, size) < 0) {
121                         rc = -1;
122                         goto out;
123                 }
124         }
125
126         buf = &data[old & md->mask];
127         size = head - old;
128         old += size;
129
130         if (record__write(rec, buf, size) < 0) {
131                 rc = -1;
132                 goto out;
133         }
134
135         md->prev = old;
136         perf_mmap__write_tail(md, old);
137
138 out:
139         return rc;
140 }
141
142 static volatile int done = 0;
143 static volatile int signr = -1;
144 static volatile int child_finished = 0;
145
146 static void sig_handler(int sig)
147 {
148         if (sig == SIGCHLD)
149                 child_finished = 1;
150
151         done = 1;
152         signr = sig;
153 }
154
155 static void record__sig_exit(int exit_status __maybe_unused, void *arg)
156 {
157         struct record *rec = arg;
158         int status;
159
160         if (rec->evlist->workload.pid > 0) {
161                 if (!child_finished)
162                         kill(rec->evlist->workload.pid, SIGTERM);
163
164                 wait(&status);
165                 if (WIFSIGNALED(status))
166                         psignal(WTERMSIG(status), rec->progname);
167         }
168
169         if (signr == -1 || signr == SIGUSR1)
170                 return;
171
172         signal(signr, SIG_DFL);
173 }
174
175 static int record__open(struct record *rec)
176 {
177         char msg[512];
178         struct perf_evsel *pos;
179         struct perf_evlist *evlist = rec->evlist;
180         struct perf_session *session = rec->session;
181         struct record_opts *opts = &rec->opts;
182         int rc = 0;
183
184         perf_evlist__config(evlist, opts);
185
186         list_for_each_entry(pos, &evlist->entries, node) {
187 try_again:
188                 if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
189                         if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
190                                 if (verbose)
191                                         ui__warning("%s\n", msg);
192                                 goto try_again;
193                         }
194
195                         rc = -errno;
196                         perf_evsel__open_strerror(pos, &opts->target,
197                                                   errno, msg, sizeof(msg));
198                         ui__error("%s\n", msg);
199                         goto out;
200                 }
201         }
202
203         if (perf_evlist__apply_filters(evlist)) {
204                 error("failed to set filter with %d (%s)\n", errno,
205                         strerror(errno));
206                 rc = -1;
207                 goto out;
208         }
209
210         if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
211                 if (errno == EPERM) {
212                         pr_err("Permission error mapping pages.\n"
213                                "Consider increasing "
214                                "/proc/sys/kernel/perf_event_mlock_kb,\n"
215                                "or try again with a smaller value of -m/--mmap_pages.\n"
216                                "(current value: %u)\n", opts->mmap_pages);
217                         rc = -errno;
218                 } else {
219                         pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
220                         rc = -errno;
221                 }
222                 goto out;
223         }
224
225         session->evlist = evlist;
226         perf_session__set_id_hdr_size(session);
227 out:
228         return rc;
229 }
230
231 static int process_buildids(struct record *rec)
232 {
233         struct perf_data_file *file  = &rec->file;
234         struct perf_session *session = rec->session;
235         u64 start = session->header.data_offset;
236
237         u64 size = lseek(file->fd, 0, SEEK_CUR);
238         if (size == 0)
239                 return 0;
240
241         return __perf_session__process_events(session, start,
242                                               size - start,
243                                               size, &build_id__mark_dso_hit_ops);
244 }
245
246 static void record__exit(int status, void *arg)
247 {
248         struct record *rec = arg;
249         struct perf_data_file *file = &rec->file;
250
251         if (status != 0)
252                 return;
253
254         if (!file->is_pipe) {
255                 rec->session->header.data_size += rec->bytes_written;
256
257                 if (!rec->no_buildid)
258                         process_buildids(rec);
259                 perf_session__write_header(rec->session, rec->evlist,
260                                            file->fd, true);
261                 perf_session__delete(rec->session);
262                 perf_evlist__delete(rec->evlist);
263                 symbol__exit();
264         }
265 }
266
267 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
268 {
269         int err;
270         struct perf_tool *tool = data;
271         /*
272          *As for guest kernel when processing subcommand record&report,
273          *we arrange module mmap prior to guest kernel mmap and trigger
274          *a preload dso because default guest module symbols are loaded
275          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
276          *method is used to avoid symbol missing when the first addr is
277          *in module instead of in guest kernel.
278          */
279         err = perf_event__synthesize_modules(tool, process_synthesized_event,
280                                              machine);
281         if (err < 0)
282                 pr_err("Couldn't record guest kernel [%d]'s reference"
283                        " relocation symbol.\n", machine->pid);
284
285         /*
286          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
287          * have no _text sometimes.
288          */
289         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
290                                                  machine, "_text");
291         if (err < 0)
292                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
293                                                          machine, "_stext");
294         if (err < 0)
295                 pr_err("Couldn't record guest kernel [%d]'s reference"
296                        " relocation symbol.\n", machine->pid);
297 }
298
299 static struct perf_event_header finished_round_event = {
300         .size = sizeof(struct perf_event_header),
301         .type = PERF_RECORD_FINISHED_ROUND,
302 };
303
304 static int record__mmap_read_all(struct record *rec)
305 {
306         int i;
307         int rc = 0;
308
309         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
310                 if (rec->evlist->mmap[i].base) {
311                         if (record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
312                                 rc = -1;
313                                 goto out;
314                         }
315                 }
316         }
317
318         if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
319                 rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
320
321 out:
322         return rc;
323 }
324
325 static void record__init_features(struct record *rec)
326 {
327         struct perf_evlist *evsel_list = rec->evlist;
328         struct perf_session *session = rec->session;
329         int feat;
330
331         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
332                 perf_header__set_feat(&session->header, feat);
333
334         if (rec->no_buildid)
335                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
336
337         if (!have_tracepoints(&evsel_list->entries))
338                 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
339
340         if (!rec->opts.branch_stack)
341                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
342 }
343
344 static volatile int workload_exec_errno;
345
346 /*
347  * perf_evlist__prepare_workload will send a SIGUSR1
348  * if the fork fails, since we asked by setting its
349  * want_signal to true.
350  */
351 static void workload_exec_failed_signal(int signo, siginfo_t *info,
352                                         void *ucontext __maybe_unused)
353 {
354         workload_exec_errno = info->si_value.sival_int;
355         done = 1;
356         signr = signo;
357         child_finished = 1;
358 }
359
360 static int __cmd_record(struct record *rec, int argc, const char **argv)
361 {
362         int err;
363         unsigned long waking = 0;
364         const bool forks = argc > 0;
365         struct machine *machine;
366         struct perf_tool *tool = &rec->tool;
367         struct record_opts *opts = &rec->opts;
368         struct perf_evlist *evsel_list = rec->evlist;
369         struct perf_data_file *file = &rec->file;
370         struct perf_session *session;
371         bool disabled = false;
372
373         rec->progname = argv[0];
374
375         on_exit(record__sig_exit, rec);
376         signal(SIGCHLD, sig_handler);
377         signal(SIGINT, sig_handler);
378         signal(SIGTERM, sig_handler);
379
380         session = perf_session__new(file, false, NULL);
381         if (session == NULL) {
382                 pr_err("Not enough memory for reading perf file header\n");
383                 return -1;
384         }
385
386         rec->session = session;
387
388         record__init_features(rec);
389
390         if (forks) {
391                 err = perf_evlist__prepare_workload(evsel_list, &opts->target,
392                                                     argv, file->is_pipe,
393                                                     workload_exec_failed_signal);
394                 if (err < 0) {
395                         pr_err("Couldn't run the workload!\n");
396                         goto out_delete_session;
397                 }
398         }
399
400         if (record__open(rec) != 0) {
401                 err = -1;
402                 goto out_delete_session;
403         }
404
405         if (!evsel_list->nr_groups)
406                 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
407
408         /*
409          * perf_session__delete(session) will be called at record__exit()
410          */
411         on_exit(record__exit, rec);
412
413         if (file->is_pipe) {
414                 err = perf_header__write_pipe(file->fd);
415                 if (err < 0)
416                         goto out_delete_session;
417         } else {
418                 err = perf_session__write_header(session, evsel_list,
419                                                  file->fd, false);
420                 if (err < 0)
421                         goto out_delete_session;
422         }
423
424         if (!rec->no_buildid
425             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
426                 pr_err("Couldn't generate buildids. "
427                        "Use --no-buildid to profile anyway.\n");
428                 err = -1;
429                 goto out_delete_session;
430         }
431
432         machine = &session->machines.host;
433
434         if (file->is_pipe) {
435                 err = perf_event__synthesize_attrs(tool, session,
436                                                    process_synthesized_event);
437                 if (err < 0) {
438                         pr_err("Couldn't synthesize attrs.\n");
439                         goto out_delete_session;
440                 }
441
442                 if (have_tracepoints(&evsel_list->entries)) {
443                         /*
444                          * FIXME err <= 0 here actually means that
445                          * there were no tracepoints so its not really
446                          * an error, just that we don't need to
447                          * synthesize anything.  We really have to
448                          * return this more properly and also
449                          * propagate errors that now are calling die()
450                          */
451                         err = perf_event__synthesize_tracing_data(tool, file->fd, evsel_list,
452                                                                   process_synthesized_event);
453                         if (err <= 0) {
454                                 pr_err("Couldn't record tracing data.\n");
455                                 goto out_delete_session;
456                         }
457                         rec->bytes_written += err;
458                 }
459         }
460
461         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
462                                                  machine, "_text");
463         if (err < 0)
464                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
465                                                          machine, "_stext");
466         if (err < 0)
467                 pr_err("Couldn't record kernel reference relocation symbol\n"
468                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
469                        "Check /proc/kallsyms permission or run as root.\n");
470
471         err = perf_event__synthesize_modules(tool, process_synthesized_event,
472                                              machine);
473         if (err < 0)
474                 pr_err("Couldn't record kernel module information.\n"
475                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
476                        "Check /proc/modules permission or run as root.\n");
477
478         if (perf_guest) {
479                 machines__process_guests(&session->machines,
480                                          perf_event__synthesize_guest_os, tool);
481         }
482
483         err = __machine__synthesize_threads(machine, tool, &opts->target, evsel_list->threads,
484                                             process_synthesized_event, opts->sample_address);
485         if (err != 0)
486                 goto out_delete_session;
487
488         if (rec->realtime_prio) {
489                 struct sched_param param;
490
491                 param.sched_priority = rec->realtime_prio;
492                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
493                         pr_err("Could not set realtime priority.\n");
494                         err = -1;
495                         goto out_delete_session;
496                 }
497         }
498
499         /*
500          * When perf is starting the traced process, all the events
501          * (apart from group members) have enable_on_exec=1 set,
502          * so don't spoil it by prematurely enabling them.
503          */
504         if (!target__none(&opts->target))
505                 perf_evlist__enable(evsel_list);
506
507         /*
508          * Let the child rip
509          */
510         if (forks)
511                 perf_evlist__start_workload(evsel_list);
512
513         for (;;) {
514                 int hits = rec->samples;
515
516                 if (record__mmap_read_all(rec) < 0) {
517                         err = -1;
518                         goto out_delete_session;
519                 }
520
521                 if (hits == rec->samples) {
522                         if (done)
523                                 break;
524                         err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
525                         waking++;
526                 }
527
528                 /*
529                  * When perf is starting the traced process, at the end events
530                  * die with the process and we wait for that. Thus no need to
531                  * disable events in this case.
532                  */
533                 if (done && !disabled && !target__none(&opts->target)) {
534                         perf_evlist__disable(evsel_list);
535                         disabled = true;
536                 }
537         }
538
539         if (forks && workload_exec_errno) {
540                 char msg[512];
541                 const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
542                 pr_err("Workload failed: %s\n", emsg);
543                 err = -1;
544                 goto out_delete_session;
545         }
546
547         if (quiet || signr == SIGUSR1)
548                 return 0;
549
550         fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
551
552         /*
553          * Approximate RIP event size: 24 bytes.
554          */
555         fprintf(stderr,
556                 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
557                 (double)rec->bytes_written / 1024.0 / 1024.0,
558                 file->path,
559                 rec->bytes_written / 24);
560
561         return 0;
562
563 out_delete_session:
564         perf_session__delete(session);
565         return err;
566 }
567
568 #define BRANCH_OPT(n, m) \
569         { .name = n, .mode = (m) }
570
571 #define BRANCH_END { .name = NULL }
572
573 struct branch_mode {
574         const char *name;
575         int mode;
576 };
577
578 static const struct branch_mode branch_modes[] = {
579         BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
580         BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
581         BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
582         BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
583         BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
584         BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
585         BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
586         BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
587         BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
588         BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
589         BRANCH_END
590 };
591
592 static int
593 parse_branch_stack(const struct option *opt, const char *str, int unset)
594 {
595 #define ONLY_PLM \
596         (PERF_SAMPLE_BRANCH_USER        |\
597          PERF_SAMPLE_BRANCH_KERNEL      |\
598          PERF_SAMPLE_BRANCH_HV)
599
600         uint64_t *mode = (uint64_t *)opt->value;
601         const struct branch_mode *br;
602         char *s, *os = NULL, *p;
603         int ret = -1;
604
605         if (unset)
606                 return 0;
607
608         /*
609          * cannot set it twice, -b + --branch-filter for instance
610          */
611         if (*mode)
612                 return -1;
613
614         /* str may be NULL in case no arg is passed to -b */
615         if (str) {
616                 /* because str is read-only */
617                 s = os = strdup(str);
618                 if (!s)
619                         return -1;
620
621                 for (;;) {
622                         p = strchr(s, ',');
623                         if (p)
624                                 *p = '\0';
625
626                         for (br = branch_modes; br->name; br++) {
627                                 if (!strcasecmp(s, br->name))
628                                         break;
629                         }
630                         if (!br->name) {
631                                 ui__warning("unknown branch filter %s,"
632                                             " check man page\n", s);
633                                 goto error;
634                         }
635
636                         *mode |= br->mode;
637
638                         if (!p)
639                                 break;
640
641                         s = p + 1;
642                 }
643         }
644         ret = 0;
645
646         /* default to any branch */
647         if ((*mode & ~ONLY_PLM) == 0) {
648                 *mode = PERF_SAMPLE_BRANCH_ANY;
649         }
650 error:
651         free(os);
652         return ret;
653 }
654
655 #ifdef HAVE_LIBUNWIND_SUPPORT
656 static int get_stack_size(char *str, unsigned long *_size)
657 {
658         char *endptr;
659         unsigned long size;
660         unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
661
662         size = strtoul(str, &endptr, 0);
663
664         do {
665                 if (*endptr)
666                         break;
667
668                 size = round_up(size, sizeof(u64));
669                 if (!size || size > max_size)
670                         break;
671
672                 *_size = size;
673                 return 0;
674
675         } while (0);
676
677         pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
678                max_size, str);
679         return -1;
680 }
681 #endif /* HAVE_LIBUNWIND_SUPPORT */
682
683 int record_parse_callchain(const char *arg, struct record_opts *opts)
684 {
685         char *tok, *name, *saveptr = NULL;
686         char *buf;
687         int ret = -1;
688
689         /* We need buffer that we know we can write to. */
690         buf = malloc(strlen(arg) + 1);
691         if (!buf)
692                 return -ENOMEM;
693
694         strcpy(buf, arg);
695
696         tok = strtok_r((char *)buf, ",", &saveptr);
697         name = tok ? : (char *)buf;
698
699         do {
700                 /* Framepointer style */
701                 if (!strncmp(name, "fp", sizeof("fp"))) {
702                         if (!strtok_r(NULL, ",", &saveptr)) {
703                                 opts->call_graph = CALLCHAIN_FP;
704                                 ret = 0;
705                         } else
706                                 pr_err("callchain: No more arguments "
707                                        "needed for -g fp\n");
708                         break;
709
710 #ifdef HAVE_LIBUNWIND_SUPPORT
711                 /* Dwarf style */
712                 } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
713                         const unsigned long default_stack_dump_size = 8192;
714
715                         ret = 0;
716                         opts->call_graph = CALLCHAIN_DWARF;
717                         opts->stack_dump_size = default_stack_dump_size;
718
719                         tok = strtok_r(NULL, ",", &saveptr);
720                         if (tok) {
721                                 unsigned long size = 0;
722
723                                 ret = get_stack_size(tok, &size);
724                                 opts->stack_dump_size = size;
725                         }
726 #endif /* HAVE_LIBUNWIND_SUPPORT */
727                 } else {
728                         pr_err("callchain: Unknown --call-graph option "
729                                "value: %s\n", arg);
730                         break;
731                 }
732
733         } while (0);
734
735         free(buf);
736         return ret;
737 }
738
739 static void callchain_debug(struct record_opts *opts)
740 {
741         pr_debug("callchain: type %d\n", opts->call_graph);
742
743         if (opts->call_graph == CALLCHAIN_DWARF)
744                 pr_debug("callchain: stack dump size %d\n",
745                          opts->stack_dump_size);
746 }
747
748 int record_parse_callchain_opt(const struct option *opt,
749                                const char *arg,
750                                int unset)
751 {
752         struct record_opts *opts = opt->value;
753         int ret;
754
755         /* --no-call-graph */
756         if (unset) {
757                 opts->call_graph = CALLCHAIN_NONE;
758                 pr_debug("callchain: disabled\n");
759                 return 0;
760         }
761
762         ret = record_parse_callchain(arg, opts);
763         if (!ret)
764                 callchain_debug(opts);
765
766         return ret;
767 }
768
769 int record_callchain_opt(const struct option *opt,
770                          const char *arg __maybe_unused,
771                          int unset __maybe_unused)
772 {
773         struct record_opts *opts = opt->value;
774
775         if (opts->call_graph == CALLCHAIN_NONE)
776                 opts->call_graph = CALLCHAIN_FP;
777
778         callchain_debug(opts);
779         return 0;
780 }
781
782 static const char * const record_usage[] = {
783         "perf record [<options>] [<command>]",
784         "perf record [<options>] -- <command> [<options>]",
785         NULL
786 };
787
788 /*
789  * XXX Ideally would be local to cmd_record() and passed to a record__new
790  * because we need to have access to it in record__exit, that is called
791  * after cmd_record() exits, but since record_options need to be accessible to
792  * builtin-script, leave it here.
793  *
794  * At least we don't ouch it in all the other functions here directly.
795  *
796  * Just say no to tons of global variables, sigh.
797  */
798 static struct record record = {
799         .opts = {
800                 .mmap_pages          = UINT_MAX,
801                 .user_freq           = UINT_MAX,
802                 .user_interval       = ULLONG_MAX,
803                 .freq                = 4000,
804                 .target              = {
805                         .uses_mmap   = true,
806                         .default_per_cpu = true,
807                 },
808         },
809 };
810
811 #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: "
812
813 #ifdef HAVE_LIBUNWIND_SUPPORT
814 const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf";
815 #else
816 const char record_callchain_help[] = CALLCHAIN_HELP "fp";
817 #endif
818
819 /*
820  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
821  * with it and switch to use the library functions in perf_evlist that came
822  * from builtin-record.c, i.e. use record_opts,
823  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
824  * using pipes, etc.
825  */
826 const struct option record_options[] = {
827         OPT_CALLBACK('e', "event", &record.evlist, "event",
828                      "event selector. use 'perf list' to list available events",
829                      parse_events_option),
830         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
831                      "event filter", parse_filter),
832         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
833                     "record events on existing process id"),
834         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
835                     "record events on existing thread id"),
836         OPT_INTEGER('r', "realtime", &record.realtime_prio,
837                     "collect data with this RT SCHED_FIFO priority"),
838         OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
839                     "collect data without buffering"),
840         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
841                     "collect raw sample records from all opened counters"),
842         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
843                             "system-wide collection from all CPUs"),
844         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
845                     "list of cpus to monitor"),
846         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
847         OPT_STRING('o', "output", &record.file.path, "file",
848                     "output file name"),
849         OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
850                         &record.opts.no_inherit_set,
851                         "child tasks do not inherit counters"),
852         OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
853         OPT_CALLBACK('m', "mmap-pages", &record.opts.mmap_pages, "pages",
854                      "number of mmap data pages",
855                      perf_evlist__parse_mmap_pages),
856         OPT_BOOLEAN(0, "group", &record.opts.group,
857                     "put the counters into a counter group"),
858         OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
859                            NULL, "enables call-graph recording" ,
860                            &record_callchain_opt),
861         OPT_CALLBACK(0, "call-graph", &record.opts,
862                      "mode[,dump_size]", record_callchain_help,
863                      &record_parse_callchain_opt),
864         OPT_INCR('v', "verbose", &verbose,
865                     "be more verbose (show counter open errors, etc)"),
866         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
867         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
868                     "per thread counts"),
869         OPT_BOOLEAN('d', "data", &record.opts.sample_address,
870                     "Sample addresses"),
871         OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
872         OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
873         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
874                     "don't sample"),
875         OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
876                     "do not update the buildid cache"),
877         OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
878                     "do not collect buildids in perf.data"),
879         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
880                      "monitor event in cgroup name only",
881                      parse_cgroups),
882         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
883                    "user to profile"),
884
885         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
886                      "branch any", "sample any taken branches",
887                      parse_branch_stack),
888
889         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
890                      "branch filter mask", "branch stack filter modes",
891                      parse_branch_stack),
892         OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
893                     "sample by weight (on special events only)"),
894         OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
895                     "sample transaction flags (special events only)"),
896         OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
897                     "use per-thread mmaps"),
898         OPT_END()
899 };
900
901 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
902 {
903         int err = -ENOMEM;
904         struct perf_evlist *evsel_list;
905         struct record *rec = &record;
906         char errbuf[BUFSIZ];
907
908         evsel_list = perf_evlist__new();
909         if (evsel_list == NULL)
910                 return -ENOMEM;
911
912         rec->evlist = evsel_list;
913
914         argc = parse_options(argc, argv, record_options, record_usage,
915                             PARSE_OPT_STOP_AT_NON_OPTION);
916         if (!argc && target__none(&rec->opts.target))
917                 usage_with_options(record_usage, record_options);
918
919         if (nr_cgroups && !rec->opts.target.system_wide) {
920                 ui__error("cgroup monitoring only available in"
921                           " system-wide mode\n");
922                 usage_with_options(record_usage, record_options);
923         }
924
925         symbol__init();
926
927         if (symbol_conf.kptr_restrict)
928                 pr_warning(
929 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
930 "check /proc/sys/kernel/kptr_restrict.\n\n"
931 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
932 "file is not found in the buildid cache or in the vmlinux path.\n\n"
933 "Samples in kernel modules won't be resolved at all.\n\n"
934 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
935 "even with a suitable vmlinux or kallsyms file.\n\n");
936
937         if (rec->no_buildid_cache || rec->no_buildid)
938                 disable_buildid_cache();
939
940         if (evsel_list->nr_entries == 0 &&
941             perf_evlist__add_default(evsel_list) < 0) {
942                 pr_err("Not enough memory for event selector list\n");
943                 goto out_symbol_exit;
944         }
945
946         if (rec->opts.target.tid && !rec->opts.no_inherit_set)
947                 rec->opts.no_inherit = true;
948
949         err = target__validate(&rec->opts.target);
950         if (err) {
951                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
952                 ui__warning("%s", errbuf);
953         }
954
955         err = target__parse_uid(&rec->opts.target);
956         if (err) {
957                 int saved_errno = errno;
958
959                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
960                 ui__error("%s", errbuf);
961
962                 err = -saved_errno;
963                 goto out_symbol_exit;
964         }
965
966         err = -ENOMEM;
967         if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
968                 usage_with_options(record_usage, record_options);
969
970         if (record_opts__config(&rec->opts)) {
971                 err = -EINVAL;
972                 goto out_free_fd;
973         }
974
975         err = __cmd_record(&record, argc, argv);
976
977         perf_evlist__munmap(evsel_list);
978         perf_evlist__close(evsel_list);
979 out_free_fd:
980         perf_evlist__delete_maps(evsel_list);
981 out_symbol_exit:
982         symbol__exit();
983         return err;
984 }