]> Pileus Git - ~andy/linux/blob - tools/perf/builtin-record.c
perf evlist: Fix 32-bit build error
[~andy/linux] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9
10 #include "perf.h"
11
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include "util/parse-options.h"
15 #include "util/parse-events.h"
16
17 #include "util/header.h"
18 #include "util/event.h"
19 #include "util/evlist.h"
20 #include "util/evsel.h"
21 #include "util/debug.h"
22 #include "util/session.h"
23 #include "util/tool.h"
24 #include "util/symbol.h"
25 #include "util/cpumap.h"
26 #include "util/thread_map.h"
27
28 #include <unistd.h>
29 #include <sched.h>
30 #include <sys/mman.h>
31
32 #ifndef HAVE_ON_EXIT_SUPPORT
33 #ifndef ATEXIT_MAX
34 #define ATEXIT_MAX 32
35 #endif
36 static int __on_exit_count = 0;
37 typedef void (*on_exit_func_t) (int, void *);
38 static on_exit_func_t __on_exit_funcs[ATEXIT_MAX];
39 static void *__on_exit_args[ATEXIT_MAX];
40 static int __exitcode = 0;
41 static void __handle_on_exit_funcs(void);
42 static int on_exit(on_exit_func_t function, void *arg);
43 #define exit(x) (exit)(__exitcode = (x))
44
45 static int on_exit(on_exit_func_t function, void *arg)
46 {
47         if (__on_exit_count == ATEXIT_MAX)
48                 return -ENOMEM;
49         else if (__on_exit_count == 0)
50                 atexit(__handle_on_exit_funcs);
51         __on_exit_funcs[__on_exit_count] = function;
52         __on_exit_args[__on_exit_count++] = arg;
53         return 0;
54 }
55
56 static void __handle_on_exit_funcs(void)
57 {
58         int i;
59         for (i = 0; i < __on_exit_count; i++)
60                 __on_exit_funcs[i] (__exitcode, __on_exit_args[i]);
61 }
62 #endif
63
64 struct perf_record {
65         struct perf_tool        tool;
66         struct perf_record_opts opts;
67         u64                     bytes_written;
68         const char              *output_name;
69         struct perf_evlist      *evlist;
70         struct perf_session     *session;
71         const char              *progname;
72         int                     output;
73         int                     realtime_prio;
74         bool                    no_buildid;
75         bool                    no_buildid_cache;
76         long                    samples;
77         off_t                   post_processing_offset;
78 };
79
80 static void advance_output(struct perf_record *rec, size_t size)
81 {
82         rec->bytes_written += size;
83 }
84
85 static int write_output(struct perf_record *rec, void *buf, size_t size)
86 {
87         while (size) {
88                 int ret = write(rec->output, buf, size);
89
90                 if (ret < 0) {
91                         pr_err("failed to write perf data, error: %m\n");
92                         return -1;
93                 }
94
95                 size -= ret;
96                 buf += ret;
97
98                 rec->bytes_written += ret;
99         }
100
101         return 0;
102 }
103
104 static int process_synthesized_event(struct perf_tool *tool,
105                                      union perf_event *event,
106                                      struct perf_sample *sample __maybe_unused,
107                                      struct machine *machine __maybe_unused)
108 {
109         struct perf_record *rec = container_of(tool, struct perf_record, tool);
110         if (write_output(rec, event, event->header.size) < 0)
111                 return -1;
112
113         return 0;
114 }
115
116 static int perf_record__mmap_read(struct perf_record *rec,
117                                    struct perf_mmap *md)
118 {
119         unsigned int head = perf_mmap__read_head(md);
120         unsigned int old = md->prev;
121         unsigned char *data = md->base + page_size;
122         unsigned long size;
123         void *buf;
124         int rc = 0;
125
126         if (old == head)
127                 return 0;
128
129         rec->samples++;
130
131         size = head - old;
132
133         if ((old & md->mask) + size != (head & md->mask)) {
134                 buf = &data[old & md->mask];
135                 size = md->mask + 1 - (old & md->mask);
136                 old += size;
137
138                 if (write_output(rec, buf, size) < 0) {
139                         rc = -1;
140                         goto out;
141                 }
142         }
143
144         buf = &data[old & md->mask];
145         size = head - old;
146         old += size;
147
148         if (write_output(rec, buf, size) < 0) {
149                 rc = -1;
150                 goto out;
151         }
152
153         md->prev = old;
154         perf_mmap__write_tail(md, old);
155
156 out:
157         return rc;
158 }
159
160 static volatile int done = 0;
161 static volatile int signr = -1;
162 static volatile int child_finished = 0;
163
164 static void sig_handler(int sig)
165 {
166         if (sig == SIGCHLD)
167                 child_finished = 1;
168
169         done = 1;
170         signr = sig;
171 }
172
173 static void perf_record__sig_exit(int exit_status __maybe_unused, void *arg)
174 {
175         struct perf_record *rec = arg;
176         int status;
177
178         if (rec->evlist->workload.pid > 0) {
179                 if (!child_finished)
180                         kill(rec->evlist->workload.pid, SIGTERM);
181
182                 wait(&status);
183                 if (WIFSIGNALED(status))
184                         psignal(WTERMSIG(status), rec->progname);
185         }
186
187         if (signr == -1 || signr == SIGUSR1)
188                 return;
189
190         signal(signr, SIG_DFL);
191 }
192
193 static int perf_record__open(struct perf_record *rec)
194 {
195         char msg[512];
196         struct perf_evsel *pos;
197         struct perf_evlist *evlist = rec->evlist;
198         struct perf_session *session = rec->session;
199         struct perf_record_opts *opts = &rec->opts;
200         int rc = 0;
201
202         perf_evlist__config(evlist, opts);
203
204         list_for_each_entry(pos, &evlist->entries, node) {
205 try_again:
206                 if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
207                         if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
208                                 if (verbose)
209                                         ui__warning("%s\n", msg);
210                                 goto try_again;
211                         }
212
213                         rc = -errno;
214                         perf_evsel__open_strerror(pos, &opts->target,
215                                                   errno, msg, sizeof(msg));
216                         ui__error("%s\n", msg);
217                         goto out;
218                 }
219         }
220
221         if (perf_evlist__apply_filters(evlist)) {
222                 error("failed to set filter with %d (%s)\n", errno,
223                         strerror(errno));
224                 rc = -1;
225                 goto out;
226         }
227
228         if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
229                 if (errno == EPERM) {
230                         pr_err("Permission error mapping pages.\n"
231                                "Consider increasing "
232                                "/proc/sys/kernel/perf_event_mlock_kb,\n"
233                                "or try again with a smaller value of -m/--mmap_pages.\n"
234                                "(current value: %d)\n", opts->mmap_pages);
235                         rc = -errno;
236                 } else {
237                         pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
238                         rc = -errno;
239                 }
240                 goto out;
241         }
242
243         session->evlist = evlist;
244         perf_session__set_id_hdr_size(session);
245 out:
246         return rc;
247 }
248
249 static int process_buildids(struct perf_record *rec)
250 {
251         u64 size = lseek(rec->output, 0, SEEK_CUR);
252
253         if (size == 0)
254                 return 0;
255
256         rec->session->fd = rec->output;
257         return __perf_session__process_events(rec->session, rec->post_processing_offset,
258                                               size - rec->post_processing_offset,
259                                               size, &build_id__mark_dso_hit_ops);
260 }
261
262 static void perf_record__exit(int status, void *arg)
263 {
264         struct perf_record *rec = arg;
265
266         if (status != 0)
267                 return;
268
269         if (!rec->opts.pipe_output) {
270                 rec->session->header.data_size += rec->bytes_written;
271
272                 if (!rec->no_buildid)
273                         process_buildids(rec);
274                 perf_session__write_header(rec->session, rec->evlist,
275                                            rec->output, true);
276                 perf_session__delete(rec->session);
277                 perf_evlist__delete(rec->evlist);
278                 symbol__exit();
279         }
280 }
281
282 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
283 {
284         int err;
285         struct perf_tool *tool = data;
286         /*
287          *As for guest kernel when processing subcommand record&report,
288          *we arrange module mmap prior to guest kernel mmap and trigger
289          *a preload dso because default guest module symbols are loaded
290          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
291          *method is used to avoid symbol missing when the first addr is
292          *in module instead of in guest kernel.
293          */
294         err = perf_event__synthesize_modules(tool, process_synthesized_event,
295                                              machine);
296         if (err < 0)
297                 pr_err("Couldn't record guest kernel [%d]'s reference"
298                        " relocation symbol.\n", machine->pid);
299
300         /*
301          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
302          * have no _text sometimes.
303          */
304         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
305                                                  machine, "_text");
306         if (err < 0)
307                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
308                                                          machine, "_stext");
309         if (err < 0)
310                 pr_err("Couldn't record guest kernel [%d]'s reference"
311                        " relocation symbol.\n", machine->pid);
312 }
313
314 static struct perf_event_header finished_round_event = {
315         .size = sizeof(struct perf_event_header),
316         .type = PERF_RECORD_FINISHED_ROUND,
317 };
318
319 static int perf_record__mmap_read_all(struct perf_record *rec)
320 {
321         int i;
322         int rc = 0;
323
324         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
325                 if (rec->evlist->mmap[i].base) {
326                         if (perf_record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
327                                 rc = -1;
328                                 goto out;
329                         }
330                 }
331         }
332
333         if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
334                 rc = write_output(rec, &finished_round_event,
335                                   sizeof(finished_round_event));
336
337 out:
338         return rc;
339 }
340
341 static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
342 {
343         struct stat st;
344         int flags;
345         int err, output, feat;
346         unsigned long waking = 0;
347         const bool forks = argc > 0;
348         struct machine *machine;
349         struct perf_tool *tool = &rec->tool;
350         struct perf_record_opts *opts = &rec->opts;
351         struct perf_evlist *evsel_list = rec->evlist;
352         const char *output_name = rec->output_name;
353         struct perf_session *session;
354         bool disabled = false;
355
356         rec->progname = argv[0];
357
358         on_exit(perf_record__sig_exit, rec);
359         signal(SIGCHLD, sig_handler);
360         signal(SIGINT, sig_handler);
361         signal(SIGUSR1, sig_handler);
362         signal(SIGTERM, sig_handler);
363
364         if (!output_name) {
365                 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
366                         opts->pipe_output = true;
367                 else
368                         rec->output_name = output_name = "perf.data";
369         }
370         if (output_name) {
371                 if (!strcmp(output_name, "-"))
372                         opts->pipe_output = true;
373                 else if (!stat(output_name, &st) && st.st_size) {
374                         char oldname[PATH_MAX];
375                         snprintf(oldname, sizeof(oldname), "%s.old",
376                                  output_name);
377                         unlink(oldname);
378                         rename(output_name, oldname);
379                 }
380         }
381
382         flags = O_CREAT|O_RDWR|O_TRUNC;
383
384         if (opts->pipe_output)
385                 output = STDOUT_FILENO;
386         else
387                 output = open(output_name, flags, S_IRUSR | S_IWUSR);
388         if (output < 0) {
389                 perror("failed to create output file");
390                 return -1;
391         }
392
393         rec->output = output;
394
395         session = perf_session__new(output_name, O_WRONLY,
396                                     true, false, NULL);
397         if (session == NULL) {
398                 pr_err("Not enough memory for reading perf file header\n");
399                 return -1;
400         }
401
402         rec->session = session;
403
404         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
405                 perf_header__set_feat(&session->header, feat);
406
407         if (rec->no_buildid)
408                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
409
410         if (!have_tracepoints(&evsel_list->entries))
411                 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
412
413         if (!rec->opts.branch_stack)
414                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
415
416         if (forks) {
417                 err = perf_evlist__prepare_workload(evsel_list, &opts->target,
418                                                     argv, opts->pipe_output,
419                                                     true);
420                 if (err < 0) {
421                         pr_err("Couldn't run the workload!\n");
422                         goto out_delete_session;
423                 }
424         }
425
426         if (perf_record__open(rec) != 0) {
427                 err = -1;
428                 goto out_delete_session;
429         }
430
431         if (!evsel_list->nr_groups)
432                 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
433
434         /*
435          * perf_session__delete(session) will be called at perf_record__exit()
436          */
437         on_exit(perf_record__exit, rec);
438
439         if (opts->pipe_output) {
440                 err = perf_header__write_pipe(output);
441                 if (err < 0)
442                         goto out_delete_session;
443         } else {
444                 err = perf_session__write_header(session, evsel_list,
445                                                  output, false);
446                 if (err < 0)
447                         goto out_delete_session;
448         }
449
450         if (!rec->no_buildid
451             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
452                 pr_err("Couldn't generate buildids. "
453                        "Use --no-buildid to profile anyway.\n");
454                 err = -1;
455                 goto out_delete_session;
456         }
457
458         rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
459
460         machine = &session->machines.host;
461
462         if (opts->pipe_output) {
463                 err = perf_event__synthesize_attrs(tool, session,
464                                                    process_synthesized_event);
465                 if (err < 0) {
466                         pr_err("Couldn't synthesize attrs.\n");
467                         goto out_delete_session;
468                 }
469
470                 if (have_tracepoints(&evsel_list->entries)) {
471                         /*
472                          * FIXME err <= 0 here actually means that
473                          * there were no tracepoints so its not really
474                          * an error, just that we don't need to
475                          * synthesize anything.  We really have to
476                          * return this more properly and also
477                          * propagate errors that now are calling die()
478                          */
479                         err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
480                                                                   process_synthesized_event);
481                         if (err <= 0) {
482                                 pr_err("Couldn't record tracing data.\n");
483                                 goto out_delete_session;
484                         }
485                         advance_output(rec, err);
486                 }
487         }
488
489         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
490                                                  machine, "_text");
491         if (err < 0)
492                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
493                                                          machine, "_stext");
494         if (err < 0)
495                 pr_err("Couldn't record kernel reference relocation symbol\n"
496                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
497                        "Check /proc/kallsyms permission or run as root.\n");
498
499         err = perf_event__synthesize_modules(tool, process_synthesized_event,
500                                              machine);
501         if (err < 0)
502                 pr_err("Couldn't record kernel module information.\n"
503                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
504                        "Check /proc/modules permission or run as root.\n");
505
506         if (perf_guest) {
507                 machines__process_guests(&session->machines,
508                                          perf_event__synthesize_guest_os, tool);
509         }
510
511         if (perf_target__has_task(&opts->target))
512                 err = perf_event__synthesize_thread_map(tool, evsel_list->threads,
513                                                   process_synthesized_event,
514                                                   machine);
515         else if (perf_target__has_cpu(&opts->target))
516                 err = perf_event__synthesize_threads(tool, process_synthesized_event,
517                                                machine);
518         else /* command specified */
519                 err = 0;
520
521         if (err != 0)
522                 goto out_delete_session;
523
524         if (rec->realtime_prio) {
525                 struct sched_param param;
526
527                 param.sched_priority = rec->realtime_prio;
528                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
529                         pr_err("Could not set realtime priority.\n");
530                         err = -1;
531                         goto out_delete_session;
532                 }
533         }
534
535         /*
536          * When perf is starting the traced process, all the events
537          * (apart from group members) have enable_on_exec=1 set,
538          * so don't spoil it by prematurely enabling them.
539          */
540         if (!perf_target__none(&opts->target))
541                 perf_evlist__enable(evsel_list);
542
543         /*
544          * Let the child rip
545          */
546         if (forks)
547                 perf_evlist__start_workload(evsel_list);
548
549         for (;;) {
550                 int hits = rec->samples;
551
552                 if (perf_record__mmap_read_all(rec) < 0) {
553                         err = -1;
554                         goto out_delete_session;
555                 }
556
557                 if (hits == rec->samples) {
558                         if (done)
559                                 break;
560                         err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
561                         waking++;
562                 }
563
564                 /*
565                  * When perf is starting the traced process, at the end events
566                  * die with the process and we wait for that. Thus no need to
567                  * disable events in this case.
568                  */
569                 if (done && !disabled && !perf_target__none(&opts->target)) {
570                         perf_evlist__disable(evsel_list);
571                         disabled = true;
572                 }
573         }
574
575         if (quiet || signr == SIGUSR1)
576                 return 0;
577
578         fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
579
580         /*
581          * Approximate RIP event size: 24 bytes.
582          */
583         fprintf(stderr,
584                 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
585                 (double)rec->bytes_written / 1024.0 / 1024.0,
586                 output_name,
587                 rec->bytes_written / 24);
588
589         return 0;
590
591 out_delete_session:
592         perf_session__delete(session);
593         return err;
594 }
595
596 #define BRANCH_OPT(n, m) \
597         { .name = n, .mode = (m) }
598
599 #define BRANCH_END { .name = NULL }
600
601 struct branch_mode {
602         const char *name;
603         int mode;
604 };
605
606 static const struct branch_mode branch_modes[] = {
607         BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
608         BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
609         BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
610         BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
611         BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
612         BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
613         BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
614         BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
615         BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
616         BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
617         BRANCH_END
618 };
619
620 static int
621 parse_branch_stack(const struct option *opt, const char *str, int unset)
622 {
623 #define ONLY_PLM \
624         (PERF_SAMPLE_BRANCH_USER        |\
625          PERF_SAMPLE_BRANCH_KERNEL      |\
626          PERF_SAMPLE_BRANCH_HV)
627
628         uint64_t *mode = (uint64_t *)opt->value;
629         const struct branch_mode *br;
630         char *s, *os = NULL, *p;
631         int ret = -1;
632
633         if (unset)
634                 return 0;
635
636         /*
637          * cannot set it twice, -b + --branch-filter for instance
638          */
639         if (*mode)
640                 return -1;
641
642         /* str may be NULL in case no arg is passed to -b */
643         if (str) {
644                 /* because str is read-only */
645                 s = os = strdup(str);
646                 if (!s)
647                         return -1;
648
649                 for (;;) {
650                         p = strchr(s, ',');
651                         if (p)
652                                 *p = '\0';
653
654                         for (br = branch_modes; br->name; br++) {
655                                 if (!strcasecmp(s, br->name))
656                                         break;
657                         }
658                         if (!br->name) {
659                                 ui__warning("unknown branch filter %s,"
660                                             " check man page\n", s);
661                                 goto error;
662                         }
663
664                         *mode |= br->mode;
665
666                         if (!p)
667                                 break;
668
669                         s = p + 1;
670                 }
671         }
672         ret = 0;
673
674         /* default to any branch */
675         if ((*mode & ~ONLY_PLM) == 0) {
676                 *mode = PERF_SAMPLE_BRANCH_ANY;
677         }
678 error:
679         free(os);
680         return ret;
681 }
682
683 #ifdef HAVE_LIBUNWIND_SUPPORT
684 static int get_stack_size(char *str, unsigned long *_size)
685 {
686         char *endptr;
687         unsigned long size;
688         unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
689
690         size = strtoul(str, &endptr, 0);
691
692         do {
693                 if (*endptr)
694                         break;
695
696                 size = round_up(size, sizeof(u64));
697                 if (!size || size > max_size)
698                         break;
699
700                 *_size = size;
701                 return 0;
702
703         } while (0);
704
705         pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
706                max_size, str);
707         return -1;
708 }
709 #endif /* HAVE_LIBUNWIND_SUPPORT */
710
711 int record_parse_callchain_opt(const struct option *opt,
712                                const char *arg, int unset)
713 {
714         struct perf_record_opts *opts = opt->value;
715         char *tok, *name, *saveptr = NULL;
716         char *buf;
717         int ret = -1;
718
719         /* --no-call-graph */
720         if (unset)
721                 return 0;
722
723         /* We specified default option if none is provided. */
724         BUG_ON(!arg);
725
726         /* We need buffer that we know we can write to. */
727         buf = malloc(strlen(arg) + 1);
728         if (!buf)
729                 return -ENOMEM;
730
731         strcpy(buf, arg);
732
733         tok = strtok_r((char *)buf, ",", &saveptr);
734         name = tok ? : (char *)buf;
735
736         do {
737                 /* Framepointer style */
738                 if (!strncmp(name, "fp", sizeof("fp"))) {
739                         if (!strtok_r(NULL, ",", &saveptr)) {
740                                 opts->call_graph = CALLCHAIN_FP;
741                                 ret = 0;
742                         } else
743                                 pr_err("callchain: No more arguments "
744                                        "needed for -g fp\n");
745                         break;
746
747 #ifdef HAVE_LIBUNWIND_SUPPORT
748                 /* Dwarf style */
749                 } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
750                         const unsigned long default_stack_dump_size = 8192;
751
752                         ret = 0;
753                         opts->call_graph = CALLCHAIN_DWARF;
754                         opts->stack_dump_size = default_stack_dump_size;
755
756                         tok = strtok_r(NULL, ",", &saveptr);
757                         if (tok) {
758                                 unsigned long size = 0;
759
760                                 ret = get_stack_size(tok, &size);
761                                 opts->stack_dump_size = size;
762                         }
763
764                         if (!ret)
765                                 pr_debug("callchain: stack dump size %d\n",
766                                          opts->stack_dump_size);
767 #endif /* HAVE_LIBUNWIND_SUPPORT */
768                 } else {
769                         pr_err("callchain: Unknown -g option "
770                                "value: %s\n", arg);
771                         break;
772                 }
773
774         } while (0);
775
776         free(buf);
777
778         if (!ret)
779                 pr_debug("callchain: type %d\n", opts->call_graph);
780
781         return ret;
782 }
783
784 static const char * const record_usage[] = {
785         "perf record [<options>] [<command>]",
786         "perf record [<options>] -- <command> [<options>]",
787         NULL
788 };
789
790 /*
791  * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
792  * because we need to have access to it in perf_record__exit, that is called
793  * after cmd_record() exits, but since record_options need to be accessible to
794  * builtin-script, leave it here.
795  *
796  * At least we don't ouch it in all the other functions here directly.
797  *
798  * Just say no to tons of global variables, sigh.
799  */
800 static struct perf_record record = {
801         .opts = {
802                 .mmap_pages          = UINT_MAX,
803                 .user_freq           = UINT_MAX,
804                 .user_interval       = ULLONG_MAX,
805                 .freq                = 4000,
806                 .target              = {
807                         .uses_mmap   = true,
808                 },
809         },
810 };
811
812 #define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: "
813
814 #ifdef HAVE_LIBUNWIND_SUPPORT
815 const char record_callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
816 #else
817 const char record_callchain_help[] = CALLCHAIN_HELP "[fp]";
818 #endif
819
820 /*
821  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
822  * with it and switch to use the library functions in perf_evlist that came
823  * from builtin-record.c, i.e. use perf_record_opts,
824  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
825  * using pipes, etc.
826  */
827 const struct option record_options[] = {
828         OPT_CALLBACK('e', "event", &record.evlist, "event",
829                      "event selector. use 'perf list' to list available events",
830                      parse_events_option),
831         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
832                      "event filter", parse_filter),
833         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
834                     "record events on existing process id"),
835         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
836                     "record events on existing thread id"),
837         OPT_INTEGER('r', "realtime", &record.realtime_prio,
838                     "collect data with this RT SCHED_FIFO priority"),
839         OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
840                     "collect data without buffering"),
841         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
842                     "collect raw sample records from all opened counters"),
843         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
844                             "system-wide collection from all CPUs"),
845         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
846                     "list of cpus to monitor"),
847         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
848         OPT_STRING('o', "output", &record.output_name, "file",
849                     "output file name"),
850         OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
851                     "child tasks do not inherit counters"),
852         OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
853         OPT_CALLBACK('m', "mmap-pages", &record.opts.mmap_pages, "pages",
854                      "number of mmap data pages",
855                      perf_evlist__parse_mmap_pages),
856         OPT_BOOLEAN(0, "group", &record.opts.group,
857                     "put the counters into a counter group"),
858         OPT_CALLBACK_DEFAULT('g', "call-graph", &record.opts,
859                              "mode[,dump_size]", record_callchain_help,
860                              &record_parse_callchain_opt, "fp"),
861         OPT_INCR('v', "verbose", &verbose,
862                     "be more verbose (show counter open errors, etc)"),
863         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
864         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
865                     "per thread counts"),
866         OPT_BOOLEAN('d', "data", &record.opts.sample_address,
867                     "Sample addresses"),
868         OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
869         OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
870         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
871                     "don't sample"),
872         OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
873                     "do not update the buildid cache"),
874         OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
875                     "do not collect buildids in perf.data"),
876         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
877                      "monitor event in cgroup name only",
878                      parse_cgroups),
879         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
880                    "user to profile"),
881
882         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
883                      "branch any", "sample any taken branches",
884                      parse_branch_stack),
885
886         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
887                      "branch filter mask", "branch stack filter modes",
888                      parse_branch_stack),
889         OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
890                     "sample by weight (on special events only)"),
891         OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
892                     "sample transaction flags (special events only)"),
893         OPT_END()
894 };
895
896 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
897 {
898         int err = -ENOMEM;
899         struct perf_evlist *evsel_list;
900         struct perf_record *rec = &record;
901         char errbuf[BUFSIZ];
902
903         evsel_list = perf_evlist__new();
904         if (evsel_list == NULL)
905                 return -ENOMEM;
906
907         rec->evlist = evsel_list;
908
909         argc = parse_options(argc, argv, record_options, record_usage,
910                             PARSE_OPT_STOP_AT_NON_OPTION);
911         if (!argc && perf_target__none(&rec->opts.target))
912                 usage_with_options(record_usage, record_options);
913
914         if (nr_cgroups && !rec->opts.target.system_wide) {
915                 ui__error("cgroup monitoring only available in"
916                           " system-wide mode\n");
917                 usage_with_options(record_usage, record_options);
918         }
919
920         symbol__init();
921
922         if (symbol_conf.kptr_restrict)
923                 pr_warning(
924 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
925 "check /proc/sys/kernel/kptr_restrict.\n\n"
926 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
927 "file is not found in the buildid cache or in the vmlinux path.\n\n"
928 "Samples in kernel modules won't be resolved at all.\n\n"
929 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
930 "even with a suitable vmlinux or kallsyms file.\n\n");
931
932         if (rec->no_buildid_cache || rec->no_buildid)
933                 disable_buildid_cache();
934
935         if (evsel_list->nr_entries == 0 &&
936             perf_evlist__add_default(evsel_list) < 0) {
937                 pr_err("Not enough memory for event selector list\n");
938                 goto out_symbol_exit;
939         }
940
941         err = perf_target__validate(&rec->opts.target);
942         if (err) {
943                 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
944                 ui__warning("%s", errbuf);
945         }
946
947         err = perf_target__parse_uid(&rec->opts.target);
948         if (err) {
949                 int saved_errno = errno;
950
951                 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
952                 ui__error("%s", errbuf);
953
954                 err = -saved_errno;
955                 goto out_symbol_exit;
956         }
957
958         err = -ENOMEM;
959         if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
960                 usage_with_options(record_usage, record_options);
961
962         if (rec->opts.user_interval != ULLONG_MAX)
963                 rec->opts.default_interval = rec->opts.user_interval;
964         if (rec->opts.user_freq != UINT_MAX)
965                 rec->opts.freq = rec->opts.user_freq;
966
967         /*
968          * User specified count overrides default frequency.
969          */
970         if (rec->opts.default_interval)
971                 rec->opts.freq = 0;
972         else if (rec->opts.freq) {
973                 rec->opts.default_interval = rec->opts.freq;
974         } else {
975                 ui__error("frequency and count are zero, aborting\n");
976                 err = -EINVAL;
977                 goto out_free_fd;
978         }
979
980         err = __cmd_record(&record, argc, argv);
981
982         perf_evlist__munmap(evsel_list);
983         perf_evlist__close(evsel_list);
984 out_free_fd:
985         perf_evlist__delete_maps(evsel_list);
986 out_symbol_exit:
987         symbol__exit();
988         return err;
989 }