]> Pileus Git - ~andy/linux/blob - tools/perf/builtin-record.c
Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[~andy/linux] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9
10 #include "perf.h"
11
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include "util/parse-options.h"
15 #include "util/parse-events.h"
16
17 #include "util/header.h"
18 #include "util/event.h"
19 #include "util/evlist.h"
20 #include "util/evsel.h"
21 #include "util/debug.h"
22 #include "util/session.h"
23 #include "util/tool.h"
24 #include "util/symbol.h"
25 #include "util/cpumap.h"
26 #include "util/thread_map.h"
27 #include "util/data.h"
28
29 #include <unistd.h>
30 #include <sched.h>
31 #include <sys/mman.h>
32
33 #ifndef HAVE_ON_EXIT_SUPPORT
34 #ifndef ATEXIT_MAX
35 #define ATEXIT_MAX 32
36 #endif
37 static int __on_exit_count = 0;
38 typedef void (*on_exit_func_t) (int, void *);
39 static on_exit_func_t __on_exit_funcs[ATEXIT_MAX];
40 static void *__on_exit_args[ATEXIT_MAX];
41 static int __exitcode = 0;
42 static void __handle_on_exit_funcs(void);
43 static int on_exit(on_exit_func_t function, void *arg);
44 #define exit(x) (exit)(__exitcode = (x))
45
46 static int on_exit(on_exit_func_t function, void *arg)
47 {
48         if (__on_exit_count == ATEXIT_MAX)
49                 return -ENOMEM;
50         else if (__on_exit_count == 0)
51                 atexit(__handle_on_exit_funcs);
52         __on_exit_funcs[__on_exit_count] = function;
53         __on_exit_args[__on_exit_count++] = arg;
54         return 0;
55 }
56
57 static void __handle_on_exit_funcs(void)
58 {
59         int i;
60         for (i = 0; i < __on_exit_count; i++)
61                 __on_exit_funcs[i] (__exitcode, __on_exit_args[i]);
62 }
63 #endif
64
65 struct record {
66         struct perf_tool        tool;
67         struct record_opts      opts;
68         u64                     bytes_written;
69         struct perf_data_file   file;
70         struct perf_evlist      *evlist;
71         struct perf_session     *session;
72         const char              *progname;
73         int                     realtime_prio;
74         bool                    no_buildid;
75         bool                    no_buildid_cache;
76         long                    samples;
77 };
78
79 static int record__write(struct record *rec, void *bf, size_t size)
80 {
81         if (perf_data_file__write(rec->session->file, bf, size) < 0) {
82                 pr_err("failed to write perf data, error: %m\n");
83                 return -1;
84         }
85
86         rec->bytes_written += size;
87         return 0;
88 }
89
90 static int process_synthesized_event(struct perf_tool *tool,
91                                      union perf_event *event,
92                                      struct perf_sample *sample __maybe_unused,
93                                      struct machine *machine __maybe_unused)
94 {
95         struct record *rec = container_of(tool, struct record, tool);
96         return record__write(rec, event, event->header.size);
97 }
98
99 static int record__mmap_read(struct record *rec, struct perf_mmap *md)
100 {
101         unsigned int head = perf_mmap__read_head(md);
102         unsigned int old = md->prev;
103         unsigned char *data = md->base + page_size;
104         unsigned long size;
105         void *buf;
106         int rc = 0;
107
108         if (old == head)
109                 return 0;
110
111         rec->samples++;
112
113         size = head - old;
114
115         if ((old & md->mask) + size != (head & md->mask)) {
116                 buf = &data[old & md->mask];
117                 size = md->mask + 1 - (old & md->mask);
118                 old += size;
119
120                 if (record__write(rec, buf, size) < 0) {
121                         rc = -1;
122                         goto out;
123                 }
124         }
125
126         buf = &data[old & md->mask];
127         size = head - old;
128         old += size;
129
130         if (record__write(rec, buf, size) < 0) {
131                 rc = -1;
132                 goto out;
133         }
134
135         md->prev = old;
136         perf_mmap__write_tail(md, old);
137
138 out:
139         return rc;
140 }
141
142 static volatile int done = 0;
143 static volatile int signr = -1;
144 static volatile int child_finished = 0;
145
146 static void sig_handler(int sig)
147 {
148         if (sig == SIGCHLD)
149                 child_finished = 1;
150
151         done = 1;
152         signr = sig;
153 }
154
155 static void record__sig_exit(int exit_status __maybe_unused, void *arg)
156 {
157         struct record *rec = arg;
158         int status;
159
160         if (rec->evlist->workload.pid > 0) {
161                 if (!child_finished)
162                         kill(rec->evlist->workload.pid, SIGTERM);
163
164                 wait(&status);
165                 if (WIFSIGNALED(status))
166                         psignal(WTERMSIG(status), rec->progname);
167         }
168
169         if (signr == -1 || signr == SIGUSR1)
170                 return;
171
172         signal(signr, SIG_DFL);
173 }
174
175 static int record__open(struct record *rec)
176 {
177         char msg[512];
178         struct perf_evsel *pos;
179         struct perf_evlist *evlist = rec->evlist;
180         struct perf_session *session = rec->session;
181         struct record_opts *opts = &rec->opts;
182         int rc = 0;
183
184         perf_evlist__config(evlist, opts);
185
186         evlist__for_each(evlist, pos) {
187 try_again:
188                 if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
189                         if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
190                                 if (verbose)
191                                         ui__warning("%s\n", msg);
192                                 goto try_again;
193                         }
194
195                         rc = -errno;
196                         perf_evsel__open_strerror(pos, &opts->target,
197                                                   errno, msg, sizeof(msg));
198                         ui__error("%s\n", msg);
199                         goto out;
200                 }
201         }
202
203         if (perf_evlist__apply_filters(evlist)) {
204                 error("failed to set filter with %d (%s)\n", errno,
205                         strerror(errno));
206                 rc = -1;
207                 goto out;
208         }
209
210         if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
211                 if (errno == EPERM) {
212                         pr_err("Permission error mapping pages.\n"
213                                "Consider increasing "
214                                "/proc/sys/kernel/perf_event_mlock_kb,\n"
215                                "or try again with a smaller value of -m/--mmap_pages.\n"
216                                "(current value: %u)\n", opts->mmap_pages);
217                         rc = -errno;
218                 } else {
219                         pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
220                         rc = -errno;
221                 }
222                 goto out;
223         }
224
225         session->evlist = evlist;
226         perf_session__set_id_hdr_size(session);
227 out:
228         return rc;
229 }
230
231 static int process_buildids(struct record *rec)
232 {
233         struct perf_data_file *file  = &rec->file;
234         struct perf_session *session = rec->session;
235         u64 start = session->header.data_offset;
236
237         u64 size = lseek(file->fd, 0, SEEK_CUR);
238         if (size == 0)
239                 return 0;
240
241         return __perf_session__process_events(session, start,
242                                               size - start,
243                                               size, &build_id__mark_dso_hit_ops);
244 }
245
246 static void record__exit(int status, void *arg)
247 {
248         struct record *rec = arg;
249         struct perf_data_file *file = &rec->file;
250
251         if (status != 0)
252                 return;
253
254         if (!file->is_pipe) {
255                 rec->session->header.data_size += rec->bytes_written;
256
257                 if (!rec->no_buildid)
258                         process_buildids(rec);
259                 perf_session__write_header(rec->session, rec->evlist,
260                                            file->fd, true);
261                 perf_session__delete(rec->session);
262                 perf_evlist__delete(rec->evlist);
263                 symbol__exit();
264         }
265 }
266
267 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
268 {
269         int err;
270         struct perf_tool *tool = data;
271         /*
272          *As for guest kernel when processing subcommand record&report,
273          *we arrange module mmap prior to guest kernel mmap and trigger
274          *a preload dso because default guest module symbols are loaded
275          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
276          *method is used to avoid symbol missing when the first addr is
277          *in module instead of in guest kernel.
278          */
279         err = perf_event__synthesize_modules(tool, process_synthesized_event,
280                                              machine);
281         if (err < 0)
282                 pr_err("Couldn't record guest kernel [%d]'s reference"
283                        " relocation symbol.\n", machine->pid);
284
285         /*
286          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
287          * have no _text sometimes.
288          */
289         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
290                                                  machine);
291         if (err < 0)
292                 pr_err("Couldn't record guest kernel [%d]'s reference"
293                        " relocation symbol.\n", machine->pid);
294 }
295
296 static struct perf_event_header finished_round_event = {
297         .size = sizeof(struct perf_event_header),
298         .type = PERF_RECORD_FINISHED_ROUND,
299 };
300
301 static int record__mmap_read_all(struct record *rec)
302 {
303         int i;
304         int rc = 0;
305
306         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
307                 if (rec->evlist->mmap[i].base) {
308                         if (record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
309                                 rc = -1;
310                                 goto out;
311                         }
312                 }
313         }
314
315         if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
316                 rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
317
318 out:
319         return rc;
320 }
321
322 static void record__init_features(struct record *rec)
323 {
324         struct perf_session *session = rec->session;
325         int feat;
326
327         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
328                 perf_header__set_feat(&session->header, feat);
329
330         if (rec->no_buildid)
331                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
332
333         if (!have_tracepoints(&rec->evlist->entries))
334                 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
335
336         if (!rec->opts.branch_stack)
337                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
338 }
339
340 static volatile int workload_exec_errno;
341
342 /*
343  * perf_evlist__prepare_workload will send a SIGUSR1
344  * if the fork fails, since we asked by setting its
345  * want_signal to true.
346  */
347 static void workload_exec_failed_signal(int signo, siginfo_t *info,
348                                         void *ucontext __maybe_unused)
349 {
350         workload_exec_errno = info->si_value.sival_int;
351         done = 1;
352         signr = signo;
353         child_finished = 1;
354 }
355
356 static int __cmd_record(struct record *rec, int argc, const char **argv)
357 {
358         int err;
359         unsigned long waking = 0;
360         const bool forks = argc > 0;
361         struct machine *machine;
362         struct perf_tool *tool = &rec->tool;
363         struct record_opts *opts = &rec->opts;
364         struct perf_data_file *file = &rec->file;
365         struct perf_session *session;
366         bool disabled = false;
367
368         rec->progname = argv[0];
369
370         on_exit(record__sig_exit, rec);
371         signal(SIGCHLD, sig_handler);
372         signal(SIGINT, sig_handler);
373         signal(SIGTERM, sig_handler);
374
375         session = perf_session__new(file, false, NULL);
376         if (session == NULL) {
377                 pr_err("Not enough memory for reading perf file header\n");
378                 return -1;
379         }
380
381         rec->session = session;
382
383         record__init_features(rec);
384
385         if (forks) {
386                 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
387                                                     argv, file->is_pipe,
388                                                     workload_exec_failed_signal);
389                 if (err < 0) {
390                         pr_err("Couldn't run the workload!\n");
391                         goto out_delete_session;
392                 }
393         }
394
395         if (record__open(rec) != 0) {
396                 err = -1;
397                 goto out_delete_session;
398         }
399
400         if (!rec->evlist->nr_groups)
401                 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
402
403         /*
404          * perf_session__delete(session) will be called at record__exit()
405          */
406         on_exit(record__exit, rec);
407
408         if (file->is_pipe) {
409                 err = perf_header__write_pipe(file->fd);
410                 if (err < 0)
411                         goto out_delete_session;
412         } else {
413                 err = perf_session__write_header(session, rec->evlist,
414                                                  file->fd, false);
415                 if (err < 0)
416                         goto out_delete_session;
417         }
418
419         if (!rec->no_buildid
420             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
421                 pr_err("Couldn't generate buildids. "
422                        "Use --no-buildid to profile anyway.\n");
423                 err = -1;
424                 goto out_delete_session;
425         }
426
427         machine = &session->machines.host;
428
429         if (file->is_pipe) {
430                 err = perf_event__synthesize_attrs(tool, session,
431                                                    process_synthesized_event);
432                 if (err < 0) {
433                         pr_err("Couldn't synthesize attrs.\n");
434                         goto out_delete_session;
435                 }
436
437                 if (have_tracepoints(&rec->evlist->entries)) {
438                         /*
439                          * FIXME err <= 0 here actually means that
440                          * there were no tracepoints so its not really
441                          * an error, just that we don't need to
442                          * synthesize anything.  We really have to
443                          * return this more properly and also
444                          * propagate errors that now are calling die()
445                          */
446                         err = perf_event__synthesize_tracing_data(tool, file->fd, rec->evlist,
447                                                                   process_synthesized_event);
448                         if (err <= 0) {
449                                 pr_err("Couldn't record tracing data.\n");
450                                 goto out_delete_session;
451                         }
452                         rec->bytes_written += err;
453                 }
454         }
455
456         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
457                                                  machine);
458         if (err < 0)
459                 pr_err("Couldn't record kernel reference relocation symbol\n"
460                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
461                        "Check /proc/kallsyms permission or run as root.\n");
462
463         err = perf_event__synthesize_modules(tool, process_synthesized_event,
464                                              machine);
465         if (err < 0)
466                 pr_err("Couldn't record kernel module information.\n"
467                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
468                        "Check /proc/modules permission or run as root.\n");
469
470         if (perf_guest) {
471                 machines__process_guests(&session->machines,
472                                          perf_event__synthesize_guest_os, tool);
473         }
474
475         err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
476                                             process_synthesized_event, opts->sample_address);
477         if (err != 0)
478                 goto out_delete_session;
479
480         if (rec->realtime_prio) {
481                 struct sched_param param;
482
483                 param.sched_priority = rec->realtime_prio;
484                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
485                         pr_err("Could not set realtime priority.\n");
486                         err = -1;
487                         goto out_delete_session;
488                 }
489         }
490
491         /*
492          * When perf is starting the traced process, all the events
493          * (apart from group members) have enable_on_exec=1 set,
494          * so don't spoil it by prematurely enabling them.
495          */
496         if (!target__none(&opts->target) && !opts->initial_delay)
497                 perf_evlist__enable(rec->evlist);
498
499         /*
500          * Let the child rip
501          */
502         if (forks)
503                 perf_evlist__start_workload(rec->evlist);
504
505         if (opts->initial_delay) {
506                 usleep(opts->initial_delay * 1000);
507                 perf_evlist__enable(rec->evlist);
508         }
509
510         for (;;) {
511                 int hits = rec->samples;
512
513                 if (record__mmap_read_all(rec) < 0) {
514                         err = -1;
515                         goto out_delete_session;
516                 }
517
518                 if (hits == rec->samples) {
519                         if (done)
520                                 break;
521                         err = poll(rec->evlist->pollfd, rec->evlist->nr_fds, -1);
522                         waking++;
523                 }
524
525                 /*
526                  * When perf is starting the traced process, at the end events
527                  * die with the process and we wait for that. Thus no need to
528                  * disable events in this case.
529                  */
530                 if (done && !disabled && !target__none(&opts->target)) {
531                         perf_evlist__disable(rec->evlist);
532                         disabled = true;
533                 }
534         }
535
536         if (forks && workload_exec_errno) {
537                 char msg[512];
538                 const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
539                 pr_err("Workload failed: %s\n", emsg);
540                 err = -1;
541                 goto out_delete_session;
542         }
543
544         if (quiet || signr == SIGUSR1)
545                 return 0;
546
547         fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
548
549         /*
550          * Approximate RIP event size: 24 bytes.
551          */
552         fprintf(stderr,
553                 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
554                 (double)rec->bytes_written / 1024.0 / 1024.0,
555                 file->path,
556                 rec->bytes_written / 24);
557
558         return 0;
559
560 out_delete_session:
561         perf_session__delete(session);
562         return err;
563 }
564
565 #define BRANCH_OPT(n, m) \
566         { .name = n, .mode = (m) }
567
568 #define BRANCH_END { .name = NULL }
569
570 struct branch_mode {
571         const char *name;
572         int mode;
573 };
574
575 static const struct branch_mode branch_modes[] = {
576         BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
577         BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
578         BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
579         BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
580         BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
581         BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
582         BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
583         BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
584         BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
585         BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
586         BRANCH_END
587 };
588
589 static int
590 parse_branch_stack(const struct option *opt, const char *str, int unset)
591 {
592 #define ONLY_PLM \
593         (PERF_SAMPLE_BRANCH_USER        |\
594          PERF_SAMPLE_BRANCH_KERNEL      |\
595          PERF_SAMPLE_BRANCH_HV)
596
597         uint64_t *mode = (uint64_t *)opt->value;
598         const struct branch_mode *br;
599         char *s, *os = NULL, *p;
600         int ret = -1;
601
602         if (unset)
603                 return 0;
604
605         /*
606          * cannot set it twice, -b + --branch-filter for instance
607          */
608         if (*mode)
609                 return -1;
610
611         /* str may be NULL in case no arg is passed to -b */
612         if (str) {
613                 /* because str is read-only */
614                 s = os = strdup(str);
615                 if (!s)
616                         return -1;
617
618                 for (;;) {
619                         p = strchr(s, ',');
620                         if (p)
621                                 *p = '\0';
622
623                         for (br = branch_modes; br->name; br++) {
624                                 if (!strcasecmp(s, br->name))
625                                         break;
626                         }
627                         if (!br->name) {
628                                 ui__warning("unknown branch filter %s,"
629                                             " check man page\n", s);
630                                 goto error;
631                         }
632
633                         *mode |= br->mode;
634
635                         if (!p)
636                                 break;
637
638                         s = p + 1;
639                 }
640         }
641         ret = 0;
642
643         /* default to any branch */
644         if ((*mode & ~ONLY_PLM) == 0) {
645                 *mode = PERF_SAMPLE_BRANCH_ANY;
646         }
647 error:
648         free(os);
649         return ret;
650 }
651
652 #ifdef HAVE_LIBUNWIND_SUPPORT
653 static int get_stack_size(char *str, unsigned long *_size)
654 {
655         char *endptr;
656         unsigned long size;
657         unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
658
659         size = strtoul(str, &endptr, 0);
660
661         do {
662                 if (*endptr)
663                         break;
664
665                 size = round_up(size, sizeof(u64));
666                 if (!size || size > max_size)
667                         break;
668
669                 *_size = size;
670                 return 0;
671
672         } while (0);
673
674         pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
675                max_size, str);
676         return -1;
677 }
678 #endif /* HAVE_LIBUNWIND_SUPPORT */
679
680 int record_parse_callchain(const char *arg, struct record_opts *opts)
681 {
682         char *tok, *name, *saveptr = NULL;
683         char *buf;
684         int ret = -1;
685
686         /* We need buffer that we know we can write to. */
687         buf = malloc(strlen(arg) + 1);
688         if (!buf)
689                 return -ENOMEM;
690
691         strcpy(buf, arg);
692
693         tok = strtok_r((char *)buf, ",", &saveptr);
694         name = tok ? : (char *)buf;
695
696         do {
697                 /* Framepointer style */
698                 if (!strncmp(name, "fp", sizeof("fp"))) {
699                         if (!strtok_r(NULL, ",", &saveptr)) {
700                                 opts->call_graph = CALLCHAIN_FP;
701                                 ret = 0;
702                         } else
703                                 pr_err("callchain: No more arguments "
704                                        "needed for -g fp\n");
705                         break;
706
707 #ifdef HAVE_LIBUNWIND_SUPPORT
708                 /* Dwarf style */
709                 } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
710                         const unsigned long default_stack_dump_size = 8192;
711
712                         ret = 0;
713                         opts->call_graph = CALLCHAIN_DWARF;
714                         opts->stack_dump_size = default_stack_dump_size;
715
716                         tok = strtok_r(NULL, ",", &saveptr);
717                         if (tok) {
718                                 unsigned long size = 0;
719
720                                 ret = get_stack_size(tok, &size);
721                                 opts->stack_dump_size = size;
722                         }
723 #endif /* HAVE_LIBUNWIND_SUPPORT */
724                 } else {
725                         pr_err("callchain: Unknown --call-graph option "
726                                "value: %s\n", arg);
727                         break;
728                 }
729
730         } while (0);
731
732         free(buf);
733         return ret;
734 }
735
736 static void callchain_debug(struct record_opts *opts)
737 {
738         pr_debug("callchain: type %d\n", opts->call_graph);
739
740         if (opts->call_graph == CALLCHAIN_DWARF)
741                 pr_debug("callchain: stack dump size %d\n",
742                          opts->stack_dump_size);
743 }
744
745 int record_parse_callchain_opt(const struct option *opt,
746                                const char *arg,
747                                int unset)
748 {
749         struct record_opts *opts = opt->value;
750         int ret;
751
752         /* --no-call-graph */
753         if (unset) {
754                 opts->call_graph = CALLCHAIN_NONE;
755                 pr_debug("callchain: disabled\n");
756                 return 0;
757         }
758
759         ret = record_parse_callchain(arg, opts);
760         if (!ret)
761                 callchain_debug(opts);
762
763         return ret;
764 }
765
766 int record_callchain_opt(const struct option *opt,
767                          const char *arg __maybe_unused,
768                          int unset __maybe_unused)
769 {
770         struct record_opts *opts = opt->value;
771
772         if (opts->call_graph == CALLCHAIN_NONE)
773                 opts->call_graph = CALLCHAIN_FP;
774
775         callchain_debug(opts);
776         return 0;
777 }
778
779 static const char * const record_usage[] = {
780         "perf record [<options>] [<command>]",
781         "perf record [<options>] -- <command> [<options>]",
782         NULL
783 };
784
785 /*
786  * XXX Ideally would be local to cmd_record() and passed to a record__new
787  * because we need to have access to it in record__exit, that is called
788  * after cmd_record() exits, but since record_options need to be accessible to
789  * builtin-script, leave it here.
790  *
791  * At least we don't ouch it in all the other functions here directly.
792  *
793  * Just say no to tons of global variables, sigh.
794  */
795 static struct record record = {
796         .opts = {
797                 .mmap_pages          = UINT_MAX,
798                 .user_freq           = UINT_MAX,
799                 .user_interval       = ULLONG_MAX,
800                 .freq                = 4000,
801                 .target              = {
802                         .uses_mmap   = true,
803                         .default_per_cpu = true,
804                 },
805         },
806 };
807
808 #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: "
809
810 #ifdef HAVE_LIBUNWIND_SUPPORT
811 const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf";
812 #else
813 const char record_callchain_help[] = CALLCHAIN_HELP "fp";
814 #endif
815
816 /*
817  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
818  * with it and switch to use the library functions in perf_evlist that came
819  * from builtin-record.c, i.e. use record_opts,
820  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
821  * using pipes, etc.
822  */
823 const struct option record_options[] = {
824         OPT_CALLBACK('e', "event", &record.evlist, "event",
825                      "event selector. use 'perf list' to list available events",
826                      parse_events_option),
827         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
828                      "event filter", parse_filter),
829         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
830                     "record events on existing process id"),
831         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
832                     "record events on existing thread id"),
833         OPT_INTEGER('r', "realtime", &record.realtime_prio,
834                     "collect data with this RT SCHED_FIFO priority"),
835         OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
836                     "collect data without buffering"),
837         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
838                     "collect raw sample records from all opened counters"),
839         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
840                             "system-wide collection from all CPUs"),
841         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
842                     "list of cpus to monitor"),
843         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
844         OPT_STRING('o', "output", &record.file.path, "file",
845                     "output file name"),
846         OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
847                         &record.opts.no_inherit_set,
848                         "child tasks do not inherit counters"),
849         OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
850         OPT_CALLBACK('m', "mmap-pages", &record.opts.mmap_pages, "pages",
851                      "number of mmap data pages",
852                      perf_evlist__parse_mmap_pages),
853         OPT_BOOLEAN(0, "group", &record.opts.group,
854                     "put the counters into a counter group"),
855         OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
856                            NULL, "enables call-graph recording" ,
857                            &record_callchain_opt),
858         OPT_CALLBACK(0, "call-graph", &record.opts,
859                      "mode[,dump_size]", record_callchain_help,
860                      &record_parse_callchain_opt),
861         OPT_INCR('v', "verbose", &verbose,
862                     "be more verbose (show counter open errors, etc)"),
863         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
864         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
865                     "per thread counts"),
866         OPT_BOOLEAN('d', "data", &record.opts.sample_address,
867                     "Sample addresses"),
868         OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
869         OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
870         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
871                     "don't sample"),
872         OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
873                     "do not update the buildid cache"),
874         OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
875                     "do not collect buildids in perf.data"),
876         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
877                      "monitor event in cgroup name only",
878                      parse_cgroups),
879         OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
880                   "ms to wait before starting measurement after program start"),
881         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
882                    "user to profile"),
883
884         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
885                      "branch any", "sample any taken branches",
886                      parse_branch_stack),
887
888         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
889                      "branch filter mask", "branch stack filter modes",
890                      parse_branch_stack),
891         OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
892                     "sample by weight (on special events only)"),
893         OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
894                     "sample transaction flags (special events only)"),
895         OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
896                     "use per-thread mmaps"),
897         OPT_END()
898 };
899
900 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
901 {
902         int err = -ENOMEM;
903         struct record *rec = &record;
904         char errbuf[BUFSIZ];
905
906         rec->evlist = perf_evlist__new();
907         if (rec->evlist == NULL)
908                 return -ENOMEM;
909
910         argc = parse_options(argc, argv, record_options, record_usage,
911                             PARSE_OPT_STOP_AT_NON_OPTION);
912         if (!argc && target__none(&rec->opts.target))
913                 usage_with_options(record_usage, record_options);
914
915         if (nr_cgroups && !rec->opts.target.system_wide) {
916                 ui__error("cgroup monitoring only available in"
917                           " system-wide mode\n");
918                 usage_with_options(record_usage, record_options);
919         }
920
921         symbol__init();
922
923         if (symbol_conf.kptr_restrict)
924                 pr_warning(
925 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
926 "check /proc/sys/kernel/kptr_restrict.\n\n"
927 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
928 "file is not found in the buildid cache or in the vmlinux path.\n\n"
929 "Samples in kernel modules won't be resolved at all.\n\n"
930 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
931 "even with a suitable vmlinux or kallsyms file.\n\n");
932
933         if (rec->no_buildid_cache || rec->no_buildid)
934                 disable_buildid_cache();
935
936         if (rec->evlist->nr_entries == 0 &&
937             perf_evlist__add_default(rec->evlist) < 0) {
938                 pr_err("Not enough memory for event selector list\n");
939                 goto out_symbol_exit;
940         }
941
942         if (rec->opts.target.tid && !rec->opts.no_inherit_set)
943                 rec->opts.no_inherit = true;
944
945         err = target__validate(&rec->opts.target);
946         if (err) {
947                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
948                 ui__warning("%s", errbuf);
949         }
950
951         err = target__parse_uid(&rec->opts.target);
952         if (err) {
953                 int saved_errno = errno;
954
955                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
956                 ui__error("%s", errbuf);
957
958                 err = -saved_errno;
959                 goto out_symbol_exit;
960         }
961
962         err = -ENOMEM;
963         if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
964                 usage_with_options(record_usage, record_options);
965
966         if (record_opts__config(&rec->opts)) {
967                 err = -EINVAL;
968                 goto out_symbol_exit;
969         }
970
971         err = __cmd_record(&record, argc, argv);
972 out_symbol_exit:
973         symbol__exit();
974         return err;
975 }