1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
15 #include "util/parse-events.h"
19 #include <sys/eventfd.h>
21 #include <linux/futex.h>
23 /* For older distros: */
25 # define MAP_STACK 0x20000
29 # define MADV_HWPOISON 100
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE 12
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE 13
43 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
44 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
48 #define TP_UINT_FIELD(bits) \
49 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
51 return *(u##bits *)(sample->raw_data + field->offset); \
59 #define TP_UINT_FIELD__SWAPPED(bits) \
60 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
62 u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
63 return bswap_##bits(value);\
66 TP_UINT_FIELD__SWAPPED(16);
67 TP_UINT_FIELD__SWAPPED(32);
68 TP_UINT_FIELD__SWAPPED(64);
70 static int tp_field__init_uint(struct tp_field *field,
71 struct format_field *format_field,
74 field->offset = format_field->offset;
76 switch (format_field->size) {
78 field->integer = tp_field__u8;
81 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
84 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
87 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
96 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
98 return sample->raw_data + field->offset;
101 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
103 field->offset = format_field->offset;
104 field->pointer = tp_field__ptr;
111 struct tp_field args, ret;
115 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
116 struct tp_field *field,
119 struct format_field *format_field = perf_evsel__field(evsel, name);
121 if (format_field == NULL)
124 return tp_field__init_uint(field, format_field, evsel->needs_swap);
127 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
128 ({ struct syscall_tp *sc = evsel->priv;\
129 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
131 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
132 struct tp_field *field,
135 struct format_field *format_field = perf_evsel__field(evsel, name);
137 if (format_field == NULL)
140 return tp_field__init_ptr(field, format_field);
143 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
144 ({ struct syscall_tp *sc = evsel->priv;\
145 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
147 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
150 perf_evsel__delete(evsel);
153 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
155 evsel->priv = malloc(sizeof(struct syscall_tp));
156 if (evsel->priv != NULL) {
157 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
160 evsel->handler = handler;
171 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
173 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
175 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
177 evsel = perf_evsel__newtp("syscalls", direction);
180 if (perf_evsel__init_syscall_tp(evsel, handler))
187 perf_evsel__delete_priv(evsel);
191 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
192 ({ struct syscall_tp *fields = evsel->priv; \
193 fields->name.integer(&fields->name, sample); })
195 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
196 ({ struct syscall_tp *fields = evsel->priv; \
197 fields->name.pointer(&fields->name, sample); })
199 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
200 void *sys_enter_handler,
201 void *sys_exit_handler)
204 struct perf_evsel *sys_enter, *sys_exit;
206 sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
207 if (sys_enter == NULL)
210 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
211 goto out_delete_sys_enter;
213 sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
214 if (sys_exit == NULL)
215 goto out_delete_sys_enter;
217 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
218 goto out_delete_sys_exit;
220 perf_evlist__add(evlist, sys_enter);
221 perf_evlist__add(evlist, sys_exit);
228 perf_evsel__delete_priv(sys_exit);
229 out_delete_sys_enter:
230 perf_evsel__delete_priv(sys_enter);
237 struct thread *thread;
247 const char **entries;
250 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
251 .nr_entries = ARRAY_SIZE(array), \
255 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
257 .nr_entries = ARRAY_SIZE(array), \
261 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
263 struct syscall_arg *arg)
265 struct strarray *sa = arg->parm;
266 int idx = arg->val - sa->offset;
268 if (idx < 0 || idx >= sa->nr_entries)
269 return scnprintf(bf, size, intfmt, arg->val);
271 return scnprintf(bf, size, "%s", sa->entries[idx]);
274 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
275 struct syscall_arg *arg)
277 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
280 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
282 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
283 struct syscall_arg *arg)
285 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
288 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
290 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
291 struct syscall_arg *arg);
293 #define SCA_FD syscall_arg__scnprintf_fd
295 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
296 struct syscall_arg *arg)
301 return scnprintf(bf, size, "CWD");
303 return syscall_arg__scnprintf_fd(bf, size, arg);
306 #define SCA_FDAT syscall_arg__scnprintf_fd_at
308 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
309 struct syscall_arg *arg);
311 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
313 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
314 struct syscall_arg *arg)
316 return scnprintf(bf, size, "%#lx", arg->val);
319 #define SCA_HEX syscall_arg__scnprintf_hex
321 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
322 struct syscall_arg *arg)
324 int printed = 0, prot = arg->val;
326 if (prot == PROT_NONE)
327 return scnprintf(bf, size, "NONE");
328 #define P_MMAP_PROT(n) \
329 if (prot & PROT_##n) { \
330 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
340 P_MMAP_PROT(GROWSDOWN);
341 P_MMAP_PROT(GROWSUP);
345 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
350 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
352 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
353 struct syscall_arg *arg)
355 int printed = 0, flags = arg->val;
357 #define P_MMAP_FLAG(n) \
358 if (flags & MAP_##n) { \
359 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
364 P_MMAP_FLAG(PRIVATE);
368 P_MMAP_FLAG(ANONYMOUS);
369 P_MMAP_FLAG(DENYWRITE);
370 P_MMAP_FLAG(EXECUTABLE);
373 P_MMAP_FLAG(GROWSDOWN);
375 P_MMAP_FLAG(HUGETLB);
378 P_MMAP_FLAG(NONBLOCK);
379 P_MMAP_FLAG(NORESERVE);
380 P_MMAP_FLAG(POPULATE);
382 #ifdef MAP_UNINITIALIZED
383 P_MMAP_FLAG(UNINITIALIZED);
388 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
393 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
395 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
396 struct syscall_arg *arg)
398 int behavior = arg->val;
401 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
404 P_MADV_BHV(SEQUENTIAL);
405 P_MADV_BHV(WILLNEED);
406 P_MADV_BHV(DONTNEED);
408 P_MADV_BHV(DONTFORK);
410 P_MADV_BHV(HWPOISON);
411 #ifdef MADV_SOFT_OFFLINE
412 P_MADV_BHV(SOFT_OFFLINE);
414 P_MADV_BHV(MERGEABLE);
415 P_MADV_BHV(UNMERGEABLE);
417 P_MADV_BHV(HUGEPAGE);
419 #ifdef MADV_NOHUGEPAGE
420 P_MADV_BHV(NOHUGEPAGE);
423 P_MADV_BHV(DONTDUMP);
432 return scnprintf(bf, size, "%#x", behavior);
435 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
437 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
438 struct syscall_arg *arg)
440 int printed = 0, op = arg->val;
443 return scnprintf(bf, size, "NONE");
445 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
446 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
461 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
466 #define SCA_FLOCK syscall_arg__scnprintf_flock
468 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
470 enum syscall_futex_args {
471 SCF_UADDR = (1 << 0),
474 SCF_TIMEOUT = (1 << 3),
475 SCF_UADDR2 = (1 << 4),
479 int cmd = op & FUTEX_CMD_MASK;
483 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
484 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
485 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
486 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
487 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
488 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
489 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
490 P_FUTEX_OP(WAKE_OP); break;
491 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
492 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
493 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
494 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
495 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
496 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
497 default: printed = scnprintf(bf, size, "%#x", cmd); break;
500 if (op & FUTEX_PRIVATE_FLAG)
501 printed += scnprintf(bf + printed, size - printed, "|PRIV");
503 if (op & FUTEX_CLOCK_REALTIME)
504 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
509 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
511 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
512 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
514 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
515 static DEFINE_STRARRAY(itimers);
517 static const char *whences[] = { "SET", "CUR", "END",
525 static DEFINE_STRARRAY(whences);
527 static const char *fcntl_cmds[] = {
528 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
529 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
530 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
533 static DEFINE_STRARRAY(fcntl_cmds);
535 static const char *rlimit_resources[] = {
536 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
537 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
540 static DEFINE_STRARRAY(rlimit_resources);
542 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
543 static DEFINE_STRARRAY(sighow);
545 static const char *clockid[] = {
546 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
547 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
549 static DEFINE_STRARRAY(clockid);
551 static const char *socket_families[] = {
552 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
553 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
554 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
555 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
556 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
557 "ALG", "NFC", "VSOCK",
559 static DEFINE_STRARRAY(socket_families);
561 #ifndef SOCK_TYPE_MASK
562 #define SOCK_TYPE_MASK 0xf
565 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
566 struct syscall_arg *arg)
570 flags = type & ~SOCK_TYPE_MASK;
572 type &= SOCK_TYPE_MASK;
574 * Can't use a strarray, MIPS may override for ABI reasons.
577 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
582 P_SK_TYPE(SEQPACKET);
587 printed = scnprintf(bf, size, "%#x", type);
590 #define P_SK_FLAG(n) \
591 if (flags & SOCK_##n) { \
592 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
593 flags &= ~SOCK_##n; \
601 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
606 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
609 #define MSG_PROBE 0x10
611 #ifndef MSG_WAITFORONE
612 #define MSG_WAITFORONE 0x10000
614 #ifndef MSG_SENDPAGE_NOTLAST
615 #define MSG_SENDPAGE_NOTLAST 0x20000
618 #define MSG_FASTOPEN 0x20000000
621 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
622 struct syscall_arg *arg)
624 int printed = 0, flags = arg->val;
627 return scnprintf(bf, size, "NONE");
628 #define P_MSG_FLAG(n) \
629 if (flags & MSG_##n) { \
630 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
636 P_MSG_FLAG(DONTROUTE);
641 P_MSG_FLAG(DONTWAIT);
648 P_MSG_FLAG(ERRQUEUE);
649 P_MSG_FLAG(NOSIGNAL);
651 P_MSG_FLAG(WAITFORONE);
652 P_MSG_FLAG(SENDPAGE_NOTLAST);
653 P_MSG_FLAG(FASTOPEN);
654 P_MSG_FLAG(CMSG_CLOEXEC);
658 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
663 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
665 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
666 struct syscall_arg *arg)
671 if (mode == F_OK) /* 0 */
672 return scnprintf(bf, size, "F");
674 if (mode & n##_OK) { \
675 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
685 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
690 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
692 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
693 struct syscall_arg *arg)
695 int printed = 0, flags = arg->val;
697 if (!(flags & O_CREAT))
698 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
701 return scnprintf(bf, size, "RDONLY");
703 if (flags & O_##n) { \
704 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
728 if ((flags & O_SYNC) == O_SYNC)
729 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
741 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
746 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
748 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
749 struct syscall_arg *arg)
751 int printed = 0, flags = arg->val;
754 return scnprintf(bf, size, "NONE");
756 if (flags & EFD_##n) { \
757 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
767 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
772 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
774 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
775 struct syscall_arg *arg)
777 int printed = 0, flags = arg->val;
780 if (flags & O_##n) { \
781 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
790 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
795 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
797 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
802 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
837 return scnprintf(bf, size, "%#x", sig);
840 #define SCA_SIGNUM syscall_arg__scnprintf_signum
842 #define TCGETS 0x5401
844 static const char *tioctls[] = {
845 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
846 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
847 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
848 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
849 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
850 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
851 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
852 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
853 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
854 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
855 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
856 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
857 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
858 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
859 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
862 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
864 #define STRARRAY(arg, name, array) \
865 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
866 .arg_parm = { [arg] = &strarray__##array, }
868 static struct syscall_fmt {
871 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
877 { .name = "access", .errmsg = true,
878 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
879 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
880 { .name = "brk", .hexret = true,
881 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
882 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
883 { .name = "close", .errmsg = true,
884 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
885 { .name = "connect", .errmsg = true, },
886 { .name = "dup", .errmsg = true,
887 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
888 { .name = "dup2", .errmsg = true,
889 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
890 { .name = "dup3", .errmsg = true,
891 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
892 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
893 { .name = "eventfd2", .errmsg = true,
894 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
895 { .name = "faccessat", .errmsg = true,
896 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
897 { .name = "fadvise64", .errmsg = true,
898 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
899 { .name = "fallocate", .errmsg = true,
900 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
901 { .name = "fchdir", .errmsg = true,
902 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
903 { .name = "fchmod", .errmsg = true,
904 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
905 { .name = "fchmodat", .errmsg = true,
906 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
907 { .name = "fchown", .errmsg = true,
908 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
909 { .name = "fchownat", .errmsg = true,
910 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
911 { .name = "fcntl", .errmsg = true,
912 .arg_scnprintf = { [0] = SCA_FD, /* fd */
913 [1] = SCA_STRARRAY, /* cmd */ },
914 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
915 { .name = "fdatasync", .errmsg = true,
916 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
917 { .name = "flock", .errmsg = true,
918 .arg_scnprintf = { [0] = SCA_FD, /* fd */
919 [1] = SCA_FLOCK, /* cmd */ }, },
920 { .name = "fsetxattr", .errmsg = true,
921 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
922 { .name = "fstat", .errmsg = true, .alias = "newfstat",
923 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
924 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
925 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
926 { .name = "fstatfs", .errmsg = true,
927 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
928 { .name = "fsync", .errmsg = true,
929 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
930 { .name = "ftruncate", .errmsg = true,
931 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
932 { .name = "futex", .errmsg = true,
933 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
934 { .name = "futimesat", .errmsg = true,
935 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
936 { .name = "getdents", .errmsg = true,
937 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
938 { .name = "getdents64", .errmsg = true,
939 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
940 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
941 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
942 { .name = "ioctl", .errmsg = true,
943 .arg_scnprintf = { [0] = SCA_FD, /* fd */
944 [1] = SCA_STRHEXARRAY, /* cmd */
945 [2] = SCA_HEX, /* arg */ },
946 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
947 { .name = "kill", .errmsg = true,
948 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
949 { .name = "linkat", .errmsg = true,
950 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
951 { .name = "lseek", .errmsg = true,
952 .arg_scnprintf = { [0] = SCA_FD, /* fd */
953 [2] = SCA_STRARRAY, /* whence */ },
954 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
955 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
956 { .name = "madvise", .errmsg = true,
957 .arg_scnprintf = { [0] = SCA_HEX, /* start */
958 [2] = SCA_MADV_BHV, /* behavior */ }, },
959 { .name = "mkdirat", .errmsg = true,
960 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
961 { .name = "mknodat", .errmsg = true,
962 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
963 { .name = "mlock", .errmsg = true,
964 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
965 { .name = "mlockall", .errmsg = true,
966 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
967 { .name = "mmap", .hexret = true,
968 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
969 [2] = SCA_MMAP_PROT, /* prot */
970 [3] = SCA_MMAP_FLAGS, /* flags */
971 [4] = SCA_FD, /* fd */ }, },
972 { .name = "mprotect", .errmsg = true,
973 .arg_scnprintf = { [0] = SCA_HEX, /* start */
974 [2] = SCA_MMAP_PROT, /* prot */ }, },
975 { .name = "mremap", .hexret = true,
976 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
977 [4] = SCA_HEX, /* new_addr */ }, },
978 { .name = "munlock", .errmsg = true,
979 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
980 { .name = "munmap", .errmsg = true,
981 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
982 { .name = "name_to_handle_at", .errmsg = true,
983 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
984 { .name = "newfstatat", .errmsg = true,
985 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
986 { .name = "open", .errmsg = true,
987 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
988 { .name = "open_by_handle_at", .errmsg = true,
989 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
990 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
991 { .name = "openat", .errmsg = true,
992 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
993 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
994 { .name = "pipe2", .errmsg = true,
995 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
996 { .name = "poll", .errmsg = true, .timeout = true, },
997 { .name = "ppoll", .errmsg = true, .timeout = true, },
998 { .name = "pread", .errmsg = true, .alias = "pread64",
999 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1000 { .name = "preadv", .errmsg = true, .alias = "pread",
1001 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1002 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1003 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
1004 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1005 { .name = "pwritev", .errmsg = true,
1006 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1007 { .name = "read", .errmsg = true,
1008 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1009 { .name = "readlinkat", .errmsg = true,
1010 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1011 { .name = "readv", .errmsg = true,
1012 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1013 { .name = "recvfrom", .errmsg = true,
1014 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1015 { .name = "recvmmsg", .errmsg = true,
1016 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1017 { .name = "recvmsg", .errmsg = true,
1018 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1019 { .name = "renameat", .errmsg = true,
1020 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1021 { .name = "rt_sigaction", .errmsg = true,
1022 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1023 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1024 { .name = "rt_sigqueueinfo", .errmsg = true,
1025 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1026 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1027 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1028 { .name = "select", .errmsg = true, .timeout = true, },
1029 { .name = "sendmmsg", .errmsg = true,
1030 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1031 { .name = "sendmsg", .errmsg = true,
1032 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1033 { .name = "sendto", .errmsg = true,
1034 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1035 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1036 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1037 { .name = "shutdown", .errmsg = true,
1038 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1039 { .name = "socket", .errmsg = true,
1040 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1041 [1] = SCA_SK_TYPE, /* type */ },
1042 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1043 { .name = "socketpair", .errmsg = true,
1044 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1045 [1] = SCA_SK_TYPE, /* type */ },
1046 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1047 { .name = "stat", .errmsg = true, .alias = "newstat", },
1048 { .name = "symlinkat", .errmsg = true,
1049 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1050 { .name = "tgkill", .errmsg = true,
1051 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1052 { .name = "tkill", .errmsg = true,
1053 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1054 { .name = "uname", .errmsg = true, .alias = "newuname", },
1055 { .name = "unlinkat", .errmsg = true,
1056 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1057 { .name = "utimensat", .errmsg = true,
1058 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1059 { .name = "write", .errmsg = true,
1060 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1061 { .name = "writev", .errmsg = true,
1062 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1065 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1067 const struct syscall_fmt *fmt = fmtp;
1068 return strcmp(name, fmt->name);
1071 static struct syscall_fmt *syscall_fmt__find(const char *name)
1073 const int nmemb = ARRAY_SIZE(syscall_fmts);
1074 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1078 struct event_format *tp_format;
1081 struct syscall_fmt *fmt;
1082 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1086 static size_t fprintf_duration(unsigned long t, FILE *fp)
1088 double duration = (double)t / NSEC_PER_MSEC;
1089 size_t printed = fprintf(fp, "(");
1091 if (duration >= 1.0)
1092 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1093 else if (duration >= 0.01)
1094 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1096 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1097 return printed + fprintf(fp, "): ");
1100 struct thread_trace {
1104 unsigned long nr_events;
1112 struct intlist *syscall_stats;
1115 static struct thread_trace *thread_trace__new(void)
1117 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1120 ttrace->paths.max = -1;
1122 ttrace->syscall_stats = intlist__new(NULL);
1127 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1129 struct thread_trace *ttrace;
1134 if (thread->priv == NULL)
1135 thread->priv = thread_trace__new();
1137 if (thread->priv == NULL)
1140 ttrace = thread->priv;
1141 ++ttrace->nr_events;
1145 color_fprintf(fp, PERF_COLOR_RED,
1146 "WARNING: not enough memory, dropping samples!\n");
1151 struct perf_tool tool;
1158 struct syscall *table;
1160 struct record_opts opts;
1161 struct machine *host;
1165 unsigned long nr_events;
1166 struct strlist *ev_qualifier;
1167 bool not_ev_qualifier;
1169 const char *last_vfs_getname;
1170 struct intlist *tid_list;
1171 struct intlist *pid_list;
1173 bool multiple_threads;
1177 bool show_tool_stats;
1178 double duration_filter;
1181 u64 vfs_getname, proc_getname;
1185 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1187 struct thread_trace *ttrace = thread->priv;
1189 if (fd > ttrace->paths.max) {
1190 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1195 if (ttrace->paths.max != -1) {
1196 memset(npath + ttrace->paths.max + 1, 0,
1197 (fd - ttrace->paths.max) * sizeof(char *));
1199 memset(npath, 0, (fd + 1) * sizeof(char *));
1202 ttrace->paths.table = npath;
1203 ttrace->paths.max = fd;
1206 ttrace->paths.table[fd] = strdup(pathname);
1208 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1211 static int thread__read_fd_path(struct thread *thread, int fd)
1213 char linkname[PATH_MAX], pathname[PATH_MAX];
1217 if (thread->pid_ == thread->tid) {
1218 scnprintf(linkname, sizeof(linkname),
1219 "/proc/%d/fd/%d", thread->pid_, fd);
1221 scnprintf(linkname, sizeof(linkname),
1222 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1225 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1228 ret = readlink(linkname, pathname, sizeof(pathname));
1230 if (ret < 0 || ret > st.st_size)
1233 pathname[ret] = '\0';
1234 return trace__set_fd_pathname(thread, fd, pathname);
1237 static const char *thread__fd_path(struct thread *thread, int fd,
1238 struct trace *trace)
1240 struct thread_trace *ttrace = thread->priv;
1248 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
1251 ++trace->stats.proc_getname;
1252 if (thread__read_fd_path(thread, fd)) {
1256 return ttrace->paths.table[fd];
1259 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1260 struct syscall_arg *arg)
1263 size_t printed = scnprintf(bf, size, "%d", fd);
1264 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1267 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1272 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1273 struct syscall_arg *arg)
1276 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1277 struct thread_trace *ttrace = arg->thread->priv;
1279 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1280 zfree(&ttrace->paths.table[fd]);
1285 static bool trace__filter_duration(struct trace *trace, double t)
1287 return t < (trace->duration_filter * NSEC_PER_MSEC);
1290 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1292 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1294 return fprintf(fp, "%10.3f ", ts);
1297 static bool done = false;
1298 static bool interrupted = false;
1300 static void sig_handler(int sig)
1303 interrupted = sig == SIGINT;
1306 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1307 u64 duration, u64 tstamp, FILE *fp)
1309 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1310 printed += fprintf_duration(duration, fp);
1312 if (trace->multiple_threads) {
1313 if (trace->show_comm)
1314 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1315 printed += fprintf(fp, "%d ", thread->tid);
1321 static int trace__process_event(struct trace *trace, struct machine *machine,
1322 union perf_event *event, struct perf_sample *sample)
1326 switch (event->header.type) {
1327 case PERF_RECORD_LOST:
1328 color_fprintf(trace->output, PERF_COLOR_RED,
1329 "LOST %" PRIu64 " events!\n", event->lost.lost);
1330 ret = machine__process_lost_event(machine, event, sample);
1332 ret = machine__process_event(machine, event, sample);
1339 static int trace__tool_process(struct perf_tool *tool,
1340 union perf_event *event,
1341 struct perf_sample *sample,
1342 struct machine *machine)
1344 struct trace *trace = container_of(tool, struct trace, tool);
1345 return trace__process_event(trace, machine, event, sample);
1348 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1350 int err = symbol__init();
1355 trace->host = machine__new_host();
1356 if (trace->host == NULL)
1359 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1360 evlist->threads, trace__tool_process, false);
1367 static int syscall__set_arg_fmts(struct syscall *sc)
1369 struct format_field *field;
1372 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1373 if (sc->arg_scnprintf == NULL)
1377 sc->arg_parm = sc->fmt->arg_parm;
1379 for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1380 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1381 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1382 else if (field->flags & FIELD_IS_POINTER)
1383 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1390 static int trace__read_syscall_info(struct trace *trace, int id)
1394 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1399 if (id > trace->syscalls.max) {
1400 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1402 if (nsyscalls == NULL)
1405 if (trace->syscalls.max != -1) {
1406 memset(nsyscalls + trace->syscalls.max + 1, 0,
1407 (id - trace->syscalls.max) * sizeof(*sc));
1409 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1412 trace->syscalls.table = nsyscalls;
1413 trace->syscalls.max = id;
1416 sc = trace->syscalls.table + id;
1419 if (trace->ev_qualifier) {
1420 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1422 if (!(in ^ trace->not_ev_qualifier)) {
1423 sc->filtered = true;
1425 * No need to do read tracepoint information since this will be
1432 sc->fmt = syscall_fmt__find(sc->name);
1434 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1435 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1437 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1438 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1439 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1442 if (sc->tp_format == NULL)
1445 return syscall__set_arg_fmts(sc);
1448 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1449 unsigned long *args, struct trace *trace,
1450 struct thread *thread)
1454 if (sc->tp_format != NULL) {
1455 struct format_field *field;
1457 struct syscall_arg arg = {
1464 for (field = sc->tp_format->format.fields->next; field;
1465 field = field->next, ++arg.idx, bit <<= 1) {
1469 * Suppress this argument if its value is zero and
1470 * and we don't have a string associated in an
1473 if (args[arg.idx] == 0 &&
1474 !(sc->arg_scnprintf &&
1475 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1476 sc->arg_parm[arg.idx]))
1479 printed += scnprintf(bf + printed, size - printed,
1480 "%s%s: ", printed ? ", " : "", field->name);
1481 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1482 arg.val = args[arg.idx];
1484 arg.parm = sc->arg_parm[arg.idx];
1485 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1486 size - printed, &arg);
1488 printed += scnprintf(bf + printed, size - printed,
1489 "%ld", args[arg.idx]);
1496 printed += scnprintf(bf + printed, size - printed,
1498 printed ? ", " : "", i, args[i]);
1506 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1507 struct perf_sample *sample);
1509 static struct syscall *trace__syscall_info(struct trace *trace,
1510 struct perf_evsel *evsel, int id)
1516 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1517 * before that, leaving at a higher verbosity level till that is
1518 * explained. Reproduced with plain ftrace with:
1520 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1521 * grep "NR -1 " /t/trace_pipe
1523 * After generating some load on the machine.
1527 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1528 id, perf_evsel__name(evsel), ++n);
1533 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1534 trace__read_syscall_info(trace, id))
1537 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1540 return &trace->syscalls.table[id];
1544 fprintf(trace->output, "Problems reading syscall %d", id);
1545 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1546 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1547 fputs(" information\n", trace->output);
1552 static void thread__update_stats(struct thread_trace *ttrace,
1553 int id, struct perf_sample *sample)
1555 struct int_node *inode;
1556 struct stats *stats;
1559 inode = intlist__findnew(ttrace->syscall_stats, id);
1563 stats = inode->priv;
1564 if (stats == NULL) {
1565 stats = malloc(sizeof(struct stats));
1569 inode->priv = stats;
1572 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1573 duration = sample->time - ttrace->entry_time;
1575 update_stats(stats, duration);
1578 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1579 struct perf_sample *sample)
1584 struct thread *thread;
1585 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1586 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1587 struct thread_trace *ttrace;
1595 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1596 ttrace = thread__trace(thread, trace->output);
1600 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1601 ttrace = thread->priv;
1603 if (ttrace->entry_str == NULL) {
1604 ttrace->entry_str = malloc(1024);
1605 if (!ttrace->entry_str)
1609 ttrace->entry_time = sample->time;
1610 msg = ttrace->entry_str;
1611 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1613 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1614 args, trace, thread);
1616 if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1617 if (!trace->duration_filter && !trace->summary_only) {
1618 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1619 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1622 ttrace->entry_pending = true;
1627 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1628 struct perf_sample *sample)
1632 struct thread *thread;
1633 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1634 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1635 struct thread_trace *ttrace;
1643 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1644 ttrace = thread__trace(thread, trace->output);
1649 thread__update_stats(ttrace, id, sample);
1651 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1653 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1654 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1655 trace->last_vfs_getname = NULL;
1656 ++trace->stats.vfs_getname;
1659 ttrace = thread->priv;
1661 ttrace->exit_time = sample->time;
1663 if (ttrace->entry_time) {
1664 duration = sample->time - ttrace->entry_time;
1665 if (trace__filter_duration(trace, duration))
1667 } else if (trace->duration_filter)
1670 if (trace->summary_only)
1673 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1675 if (ttrace->entry_pending) {
1676 fprintf(trace->output, "%-70s", ttrace->entry_str);
1678 fprintf(trace->output, " ... [");
1679 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1680 fprintf(trace->output, "]: %s()", sc->name);
1683 if (sc->fmt == NULL) {
1685 fprintf(trace->output, ") = %d", ret);
1686 } else if (ret < 0 && sc->fmt->errmsg) {
1688 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1689 *e = audit_errno_to_name(-ret);
1691 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1692 } else if (ret == 0 && sc->fmt->timeout)
1693 fprintf(trace->output, ") = 0 Timeout");
1694 else if (sc->fmt->hexret)
1695 fprintf(trace->output, ") = %#x", ret);
1699 fputc('\n', trace->output);
1701 ttrace->entry_pending = false;
1706 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1707 struct perf_sample *sample)
1709 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1713 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1714 struct perf_sample *sample)
1716 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1717 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1718 struct thread *thread = machine__findnew_thread(trace->host,
1721 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1726 ttrace->runtime_ms += runtime_ms;
1727 trace->runtime_ms += runtime_ms;
1731 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1733 perf_evsel__strval(evsel, sample, "comm"),
1734 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1736 perf_evsel__intval(evsel, sample, "vruntime"));
1740 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1742 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1743 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1746 if (trace->pid_list || trace->tid_list)
1752 static int trace__process_sample(struct perf_tool *tool,
1753 union perf_event *event __maybe_unused,
1754 struct perf_sample *sample,
1755 struct perf_evsel *evsel,
1756 struct machine *machine __maybe_unused)
1758 struct trace *trace = container_of(tool, struct trace, tool);
1761 tracepoint_handler handler = evsel->handler;
1763 if (skip_sample(trace, sample))
1766 if (!trace->full_time && trace->base_time == 0)
1767 trace->base_time = sample->time;
1771 handler(trace, evsel, sample);
1777 static int parse_target_str(struct trace *trace)
1779 if (trace->opts.target.pid) {
1780 trace->pid_list = intlist__new(trace->opts.target.pid);
1781 if (trace->pid_list == NULL) {
1782 pr_err("Error parsing process id string\n");
1787 if (trace->opts.target.tid) {
1788 trace->tid_list = intlist__new(trace->opts.target.tid);
1789 if (trace->tid_list == NULL) {
1790 pr_err("Error parsing thread id string\n");
1798 static int trace__record(int argc, const char **argv)
1800 unsigned int rec_argc, i, j;
1801 const char **rec_argv;
1802 const char * const record_args[] = {
1810 /* +1 is for the event string below */
1811 rec_argc = ARRAY_SIZE(record_args) + 1 + argc;
1812 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1814 if (rec_argv == NULL)
1817 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1818 rec_argv[i] = record_args[i];
1820 /* event string may be different for older kernels - e.g., RHEL6 */
1821 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
1822 rec_argv[i] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
1823 else if (is_valid_tracepoint("syscalls:sys_enter"))
1824 rec_argv[i] = "syscalls:sys_enter,syscalls:sys_exit";
1826 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
1831 for (j = 0; j < (unsigned int)argc; j++, i++)
1832 rec_argv[i] = argv[j];
1834 return cmd_record(i, rec_argv, NULL);
1837 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1839 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1841 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
1845 if (perf_evsel__field(evsel, "pathname") == NULL) {
1846 perf_evsel__delete(evsel);
1850 evsel->handler = trace__vfs_getname;
1851 perf_evlist__add(evlist, evsel);
1854 static int trace__run(struct trace *trace, int argc, const char **argv)
1856 struct perf_evlist *evlist = perf_evlist__new();
1857 struct perf_evsel *evsel;
1859 unsigned long before;
1860 const bool forks = argc > 0;
1864 if (evlist == NULL) {
1865 fprintf(trace->output, "Not enough memory to run!\n");
1869 if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
1872 perf_evlist__add_vfs_getname(evlist);
1875 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1876 trace__sched_stat_runtime))
1879 err = perf_evlist__create_maps(evlist, &trace->opts.target);
1881 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1882 goto out_delete_evlist;
1885 err = trace__symbols_init(trace, evlist);
1887 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1888 goto out_delete_maps;
1891 perf_evlist__config(evlist, &trace->opts);
1893 signal(SIGCHLD, sig_handler);
1894 signal(SIGINT, sig_handler);
1897 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1898 argv, false, false);
1900 fprintf(trace->output, "Couldn't run the workload!\n");
1901 goto out_delete_maps;
1905 err = perf_evlist__open(evlist);
1907 goto out_error_open;
1909 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
1911 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1912 goto out_close_evlist;
1915 perf_evlist__enable(evlist);
1918 perf_evlist__start_workload(evlist);
1920 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1922 before = trace->nr_events;
1924 for (i = 0; i < evlist->nr_mmaps; i++) {
1925 union perf_event *event;
1927 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1928 const u32 type = event->header.type;
1929 tracepoint_handler handler;
1930 struct perf_sample sample;
1934 err = perf_evlist__parse_sample(evlist, event, &sample);
1936 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1940 if (!trace->full_time && trace->base_time == 0)
1941 trace->base_time = sample.time;
1943 if (type != PERF_RECORD_SAMPLE) {
1944 trace__process_event(trace, trace->host, event, &sample);
1948 evsel = perf_evlist__id2evsel(evlist, sample.id);
1949 if (evsel == NULL) {
1950 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1954 if (sample.raw_data == NULL) {
1955 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1956 perf_evsel__name(evsel), sample.tid,
1957 sample.cpu, sample.raw_size);
1961 handler = evsel->handler;
1962 handler(trace, evsel, &sample);
1964 perf_evlist__mmap_consume(evlist, i);
1971 if (trace->nr_events == before) {
1972 int timeout = done ? 100 : -1;
1974 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
1981 perf_evlist__disable(evlist);
1985 trace__fprintf_thread_summary(trace, trace->output);
1987 if (trace->show_tool_stats) {
1988 fprintf(trace->output, "Stats:\n "
1989 " vfs_getname : %" PRIu64 "\n"
1990 " proc_getname: %" PRIu64 "\n",
1991 trace->stats.vfs_getname,
1992 trace->stats.proc_getname);
1996 perf_evlist__munmap(evlist);
1998 perf_evlist__close(evlist);
2000 perf_evlist__delete_maps(evlist);
2002 perf_evlist__delete(evlist);
2004 trace->live = false;
2007 char errbuf[BUFSIZ];
2010 perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
2014 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2017 fprintf(trace->output, "%s\n", errbuf);
2018 goto out_delete_evlist;
2022 static int trace__replay(struct trace *trace)
2024 const struct perf_evsel_str_handler handlers[] = {
2025 { "probe:vfs_getname", trace__vfs_getname, },
2027 struct perf_data_file file = {
2029 .mode = PERF_DATA_MODE_READ,
2031 struct perf_session *session;
2032 struct perf_evsel *evsel;
2035 trace->tool.sample = trace__process_sample;
2036 trace->tool.mmap = perf_event__process_mmap;
2037 trace->tool.mmap2 = perf_event__process_mmap2;
2038 trace->tool.comm = perf_event__process_comm;
2039 trace->tool.exit = perf_event__process_exit;
2040 trace->tool.fork = perf_event__process_fork;
2041 trace->tool.attr = perf_event__process_attr;
2042 trace->tool.tracing_data = perf_event__process_tracing_data;
2043 trace->tool.build_id = perf_event__process_build_id;
2045 trace->tool.ordered_samples = true;
2046 trace->tool.ordering_requires_timestamps = true;
2048 /* add tid to output */
2049 trace->multiple_threads = true;
2051 if (symbol__init() < 0)
2054 session = perf_session__new(&file, false, &trace->tool);
2055 if (session == NULL)
2058 trace->host = &session->machines.host;
2060 err = perf_session__set_tracepoints_handlers(session, handlers);
2064 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2065 "raw_syscalls:sys_enter");
2066 /* older kernels have syscalls tp versus raw_syscalls */
2068 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2069 "syscalls:sys_enter");
2070 if (evsel == NULL) {
2071 pr_err("Data file does not have raw_syscalls:sys_enter event\n");
2075 if (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2076 perf_evsel__init_sc_tp_ptr_field(evsel, args)) {
2077 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2081 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2082 "raw_syscalls:sys_exit");
2084 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2085 "syscalls:sys_exit");
2086 if (evsel == NULL) {
2087 pr_err("Data file does not have raw_syscalls:sys_exit event\n");
2091 if (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2092 perf_evsel__init_sc_tp_uint_field(evsel, ret)) {
2093 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2097 err = parse_target_str(trace);
2103 err = perf_session__process_events(session, &trace->tool);
2105 pr_err("Failed to process events, error %d", err);
2107 else if (trace->summary)
2108 trace__fprintf_thread_summary(trace, trace->output);
2111 perf_session__delete(session);
2116 static size_t trace__fprintf_threads_header(FILE *fp)
2120 printed = fprintf(fp, "\n Summary of events:\n\n");
2125 static size_t thread__dump_stats(struct thread_trace *ttrace,
2126 struct trace *trace, FILE *fp)
2128 struct stats *stats;
2131 struct int_node *inode = intlist__first(ttrace->syscall_stats);
2136 printed += fprintf(fp, "\n");
2138 printed += fprintf(fp, " syscall calls min avg max stddev\n");
2139 printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n");
2140 printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n");
2142 /* each int_node is a syscall */
2144 stats = inode->priv;
2146 double min = (double)(stats->min) / NSEC_PER_MSEC;
2147 double max = (double)(stats->max) / NSEC_PER_MSEC;
2148 double avg = avg_stats(stats);
2150 u64 n = (u64) stats->n;
2152 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2153 avg /= NSEC_PER_MSEC;
2155 sc = &trace->syscalls.table[inode->i];
2156 printed += fprintf(fp, " %-15s", sc->name);
2157 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2159 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2162 inode = intlist__next(inode);
2165 printed += fprintf(fp, "\n\n");
2170 /* struct used to pass data to per-thread function */
2171 struct summary_data {
2173 struct trace *trace;
2177 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2179 struct summary_data *data = priv;
2180 FILE *fp = data->fp;
2181 size_t printed = data->printed;
2182 struct trace *trace = data->trace;
2183 struct thread_trace *ttrace = thread->priv;
2189 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2191 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2192 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2193 printed += fprintf(fp, "%.1f%%", ratio);
2194 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2195 printed += thread__dump_stats(ttrace, trace, fp);
2197 data->printed += printed;
2202 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2204 struct summary_data data = {
2208 data.printed = trace__fprintf_threads_header(fp);
2210 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2212 return data.printed;
2215 static int trace__set_duration(const struct option *opt, const char *str,
2216 int unset __maybe_unused)
2218 struct trace *trace = opt->value;
2220 trace->duration_filter = atof(str);
2224 static int trace__open_output(struct trace *trace, const char *filename)
2228 if (!stat(filename, &st) && st.st_size) {
2229 char oldname[PATH_MAX];
2231 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2233 rename(filename, oldname);
2236 trace->output = fopen(filename, "w");
2238 return trace->output == NULL ? -errno : 0;
2241 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2243 const char * const trace_usage[] = {
2244 "perf trace [<options>] [<command>]",
2245 "perf trace [<options>] -- <command> [<options>]",
2246 "perf trace record [<options>] [<command>]",
2247 "perf trace record [<options>] -- <command> [<options>]",
2250 struct trace trace = {
2252 .machine = audit_detect_machine(),
2253 .open_id = audit_name_to_syscall("open", trace.audit.machine),
2263 .user_freq = UINT_MAX,
2264 .user_interval = ULLONG_MAX,
2271 const char *output_name = NULL;
2272 const char *ev_qualifier_str = NULL;
2273 const struct option trace_options[] = {
2274 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2275 "show the thread COMM next to its id"),
2276 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2277 OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2278 "list of events to trace"),
2279 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2280 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2281 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2282 "trace events on existing process id"),
2283 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2284 "trace events on existing thread id"),
2285 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2286 "system-wide collection from all CPUs"),
2287 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2288 "list of cpus to monitor"),
2289 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2290 "child tasks do not inherit counters"),
2291 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2292 "number of mmap data pages",
2293 perf_evlist__parse_mmap_pages),
2294 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2296 OPT_CALLBACK(0, "duration", &trace, "float",
2297 "show only events with duration > N.M ms",
2298 trace__set_duration),
2299 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2300 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2301 OPT_BOOLEAN('T', "time", &trace.full_time,
2302 "Show full timestamp, not time relative to first start"),
2303 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2304 "Show only syscall summary with statistics"),
2305 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2306 "Show all syscalls and summary with statistics"),
2312 if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2313 return trace__record(argc-2, &argv[2]);
2315 argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2317 /* summary_only implies summary option, but don't overwrite summary if set */
2318 if (trace.summary_only)
2319 trace.summary = trace.summary_only;
2321 if (output_name != NULL) {
2322 err = trace__open_output(&trace, output_name);
2324 perror("failed to create output file");
2329 if (ev_qualifier_str != NULL) {
2330 const char *s = ev_qualifier_str;
2332 trace.not_ev_qualifier = *s == '!';
2333 if (trace.not_ev_qualifier)
2335 trace.ev_qualifier = strlist__new(true, s);
2336 if (trace.ev_qualifier == NULL) {
2337 fputs("Not enough memory to parse event qualifier",
2344 err = target__validate(&trace.opts.target);
2346 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2347 fprintf(trace.output, "%s", bf);
2351 err = target__parse_uid(&trace.opts.target);
2353 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2354 fprintf(trace.output, "%s", bf);
2358 if (!argc && target__none(&trace.opts.target))
2359 trace.opts.target.system_wide = true;
2362 err = trace__replay(&trace);
2364 err = trace__run(&trace, argc, argv);
2367 if (output_name != NULL)
2368 fclose(trace.output);