1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
17 #include <sys/eventfd.h>
19 #include <linux/futex.h>
21 /* For older distros: */
23 # define MAP_STACK 0x20000
27 # define MADV_HWPOISON 100
30 #ifndef MADV_MERGEABLE
31 # define MADV_MERGEABLE 12
34 #ifndef MADV_UNMERGEABLE
35 # define MADV_UNMERGEABLE 13
40 struct thread *thread;
53 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
54 .nr_entries = ARRAY_SIZE(array), \
58 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
60 .nr_entries = ARRAY_SIZE(array), \
64 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
66 struct syscall_arg *arg)
68 struct strarray *sa = arg->parm;
69 int idx = arg->val - sa->offset;
71 if (idx < 0 || idx >= sa->nr_entries)
72 return scnprintf(bf, size, intfmt, arg->val);
74 return scnprintf(bf, size, "%s", sa->entries[idx]);
77 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
78 struct syscall_arg *arg)
80 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
83 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
85 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
86 struct syscall_arg *arg)
88 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
91 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
93 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
94 struct syscall_arg *arg);
96 #define SCA_FD syscall_arg__scnprintf_fd
98 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
99 struct syscall_arg *arg)
104 return scnprintf(bf, size, "CWD");
106 return syscall_arg__scnprintf_fd(bf, size, arg);
109 #define SCA_FDAT syscall_arg__scnprintf_fd_at
111 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
112 struct syscall_arg *arg);
114 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
116 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
117 struct syscall_arg *arg)
119 return scnprintf(bf, size, "%#lx", arg->val);
122 #define SCA_HEX syscall_arg__scnprintf_hex
124 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
125 struct syscall_arg *arg)
127 int printed = 0, prot = arg->val;
129 if (prot == PROT_NONE)
130 return scnprintf(bf, size, "NONE");
131 #define P_MMAP_PROT(n) \
132 if (prot & PROT_##n) { \
133 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
143 P_MMAP_PROT(GROWSDOWN);
144 P_MMAP_PROT(GROWSUP);
148 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
153 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
155 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
156 struct syscall_arg *arg)
158 int printed = 0, flags = arg->val;
160 #define P_MMAP_FLAG(n) \
161 if (flags & MAP_##n) { \
162 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
167 P_MMAP_FLAG(PRIVATE);
171 P_MMAP_FLAG(ANONYMOUS);
172 P_MMAP_FLAG(DENYWRITE);
173 P_MMAP_FLAG(EXECUTABLE);
176 P_MMAP_FLAG(GROWSDOWN);
178 P_MMAP_FLAG(HUGETLB);
181 P_MMAP_FLAG(NONBLOCK);
182 P_MMAP_FLAG(NORESERVE);
183 P_MMAP_FLAG(POPULATE);
185 #ifdef MAP_UNINITIALIZED
186 P_MMAP_FLAG(UNINITIALIZED);
191 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
196 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
198 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
199 struct syscall_arg *arg)
201 int behavior = arg->val;
204 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
207 P_MADV_BHV(SEQUENTIAL);
208 P_MADV_BHV(WILLNEED);
209 P_MADV_BHV(DONTNEED);
211 P_MADV_BHV(DONTFORK);
213 P_MADV_BHV(HWPOISON);
214 #ifdef MADV_SOFT_OFFLINE
215 P_MADV_BHV(SOFT_OFFLINE);
217 P_MADV_BHV(MERGEABLE);
218 P_MADV_BHV(UNMERGEABLE);
220 P_MADV_BHV(HUGEPAGE);
222 #ifdef MADV_NOHUGEPAGE
223 P_MADV_BHV(NOHUGEPAGE);
226 P_MADV_BHV(DONTDUMP);
235 return scnprintf(bf, size, "%#x", behavior);
238 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
240 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
241 struct syscall_arg *arg)
243 int printed = 0, op = arg->val;
246 return scnprintf(bf, size, "NONE");
248 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
249 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
264 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
269 #define SCA_FLOCK syscall_arg__scnprintf_flock
271 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
273 enum syscall_futex_args {
274 SCF_UADDR = (1 << 0),
277 SCF_TIMEOUT = (1 << 3),
278 SCF_UADDR2 = (1 << 4),
282 int cmd = op & FUTEX_CMD_MASK;
286 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
287 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
288 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
289 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
290 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
291 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
292 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
293 P_FUTEX_OP(WAKE_OP); break;
294 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
295 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
296 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
297 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
298 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
299 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
300 default: printed = scnprintf(bf, size, "%#x", cmd); break;
303 if (op & FUTEX_PRIVATE_FLAG)
304 printed += scnprintf(bf + printed, size - printed, "|PRIV");
306 if (op & FUTEX_CLOCK_REALTIME)
307 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
312 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
314 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
315 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
317 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
318 static DEFINE_STRARRAY(itimers);
320 static const char *whences[] = { "SET", "CUR", "END",
328 static DEFINE_STRARRAY(whences);
330 static const char *fcntl_cmds[] = {
331 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
332 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
333 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
336 static DEFINE_STRARRAY(fcntl_cmds);
338 static const char *rlimit_resources[] = {
339 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
340 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
343 static DEFINE_STRARRAY(rlimit_resources);
345 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
346 static DEFINE_STRARRAY(sighow);
348 static const char *clockid[] = {
349 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
350 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
352 static DEFINE_STRARRAY(clockid);
354 static const char *socket_families[] = {
355 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
356 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
357 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
358 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
359 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
360 "ALG", "NFC", "VSOCK",
362 static DEFINE_STRARRAY(socket_families);
364 #ifndef SOCK_TYPE_MASK
365 #define SOCK_TYPE_MASK 0xf
368 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
369 struct syscall_arg *arg)
373 flags = type & ~SOCK_TYPE_MASK;
375 type &= SOCK_TYPE_MASK;
377 * Can't use a strarray, MIPS may override for ABI reasons.
380 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
385 P_SK_TYPE(SEQPACKET);
390 printed = scnprintf(bf, size, "%#x", type);
393 #define P_SK_FLAG(n) \
394 if (flags & SOCK_##n) { \
395 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
396 flags &= ~SOCK_##n; \
404 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
409 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
412 #define MSG_PROBE 0x10
414 #ifndef MSG_WAITFORONE
415 #define MSG_WAITFORONE 0x10000
417 #ifndef MSG_SENDPAGE_NOTLAST
418 #define MSG_SENDPAGE_NOTLAST 0x20000
421 #define MSG_FASTOPEN 0x20000000
424 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
425 struct syscall_arg *arg)
427 int printed = 0, flags = arg->val;
430 return scnprintf(bf, size, "NONE");
431 #define P_MSG_FLAG(n) \
432 if (flags & MSG_##n) { \
433 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
439 P_MSG_FLAG(DONTROUTE);
444 P_MSG_FLAG(DONTWAIT);
451 P_MSG_FLAG(ERRQUEUE);
452 P_MSG_FLAG(NOSIGNAL);
454 P_MSG_FLAG(WAITFORONE);
455 P_MSG_FLAG(SENDPAGE_NOTLAST);
456 P_MSG_FLAG(FASTOPEN);
457 P_MSG_FLAG(CMSG_CLOEXEC);
461 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
466 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
468 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
469 struct syscall_arg *arg)
474 if (mode == F_OK) /* 0 */
475 return scnprintf(bf, size, "F");
477 if (mode & n##_OK) { \
478 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
488 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
493 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
495 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
496 struct syscall_arg *arg)
498 int printed = 0, flags = arg->val;
500 if (!(flags & O_CREAT))
501 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
504 return scnprintf(bf, size, "RDONLY");
506 if (flags & O_##n) { \
507 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
531 if ((flags & O_SYNC) == O_SYNC)
532 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
544 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
549 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
551 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
552 struct syscall_arg *arg)
554 int printed = 0, flags = arg->val;
557 return scnprintf(bf, size, "NONE");
559 if (flags & EFD_##n) { \
560 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
570 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
575 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
577 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
578 struct syscall_arg *arg)
580 int printed = 0, flags = arg->val;
583 if (flags & O_##n) { \
584 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
593 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
598 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
600 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
605 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
640 return scnprintf(bf, size, "%#x", sig);
643 #define SCA_SIGNUM syscall_arg__scnprintf_signum
645 #define TCGETS 0x5401
647 static const char *tioctls[] = {
648 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
649 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
650 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
651 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
652 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
653 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
654 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
655 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
656 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
657 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
658 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
659 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
660 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
661 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
662 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
665 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
667 #define STRARRAY(arg, name, array) \
668 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
669 .arg_parm = { [arg] = &strarray__##array, }
671 static struct syscall_fmt {
674 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
680 { .name = "access", .errmsg = true,
681 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
682 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
683 { .name = "brk", .hexret = true,
684 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
685 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
686 { .name = "close", .errmsg = true,
687 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
688 { .name = "connect", .errmsg = true, },
689 { .name = "dup", .errmsg = true,
690 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
691 { .name = "dup2", .errmsg = true,
692 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
693 { .name = "dup3", .errmsg = true,
694 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
695 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
696 { .name = "eventfd2", .errmsg = true,
697 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
698 { .name = "faccessat", .errmsg = true,
699 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
700 { .name = "fadvise64", .errmsg = true,
701 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
702 { .name = "fallocate", .errmsg = true,
703 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
704 { .name = "fchdir", .errmsg = true,
705 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
706 { .name = "fchmod", .errmsg = true,
707 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
708 { .name = "fchmodat", .errmsg = true,
709 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
710 { .name = "fchown", .errmsg = true,
711 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
712 { .name = "fchownat", .errmsg = true,
713 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
714 { .name = "fcntl", .errmsg = true,
715 .arg_scnprintf = { [0] = SCA_FD, /* fd */
716 [1] = SCA_STRARRAY, /* cmd */ },
717 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
718 { .name = "fdatasync", .errmsg = true,
719 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
720 { .name = "flock", .errmsg = true,
721 .arg_scnprintf = { [0] = SCA_FD, /* fd */
722 [1] = SCA_FLOCK, /* cmd */ }, },
723 { .name = "fsetxattr", .errmsg = true,
724 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
725 { .name = "fstat", .errmsg = true, .alias = "newfstat",
726 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
727 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
728 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
729 { .name = "fstatfs", .errmsg = true,
730 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
731 { .name = "fsync", .errmsg = true,
732 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
733 { .name = "ftruncate", .errmsg = true,
734 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
735 { .name = "futex", .errmsg = true,
736 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
737 { .name = "futimesat", .errmsg = true,
738 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
739 { .name = "getdents", .errmsg = true,
740 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
741 { .name = "getdents64", .errmsg = true,
742 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
743 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
744 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
745 { .name = "ioctl", .errmsg = true,
746 .arg_scnprintf = { [0] = SCA_FD, /* fd */
747 [1] = SCA_STRHEXARRAY, /* cmd */
748 [2] = SCA_HEX, /* arg */ },
749 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
750 { .name = "kill", .errmsg = true,
751 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
752 { .name = "linkat", .errmsg = true,
753 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
754 { .name = "lseek", .errmsg = true,
755 .arg_scnprintf = { [0] = SCA_FD, /* fd */
756 [2] = SCA_STRARRAY, /* whence */ },
757 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
758 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
759 { .name = "madvise", .errmsg = true,
760 .arg_scnprintf = { [0] = SCA_HEX, /* start */
761 [2] = SCA_MADV_BHV, /* behavior */ }, },
762 { .name = "mkdirat", .errmsg = true,
763 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
764 { .name = "mknodat", .errmsg = true,
765 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
766 { .name = "mlock", .errmsg = true,
767 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
768 { .name = "mlockall", .errmsg = true,
769 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
770 { .name = "mmap", .hexret = true,
771 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
772 [2] = SCA_MMAP_PROT, /* prot */
773 [3] = SCA_MMAP_FLAGS, /* flags */ }, },
774 { .name = "mprotect", .errmsg = true,
775 .arg_scnprintf = { [0] = SCA_HEX, /* start */
776 [2] = SCA_MMAP_PROT, /* prot */ }, },
777 { .name = "mremap", .hexret = true,
778 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
779 [4] = SCA_HEX, /* new_addr */ }, },
780 { .name = "munlock", .errmsg = true,
781 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
782 { .name = "munmap", .errmsg = true,
783 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
784 { .name = "name_to_handle_at", .errmsg = true,
785 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
786 { .name = "newfstatat", .errmsg = true,
787 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
788 { .name = "open", .errmsg = true,
789 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
790 { .name = "open_by_handle_at", .errmsg = true,
791 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
792 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
793 { .name = "openat", .errmsg = true,
794 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
795 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
796 { .name = "pipe2", .errmsg = true,
797 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
798 { .name = "poll", .errmsg = true, .timeout = true, },
799 { .name = "ppoll", .errmsg = true, .timeout = true, },
800 { .name = "pread", .errmsg = true, .alias = "pread64",
801 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
802 { .name = "preadv", .errmsg = true, .alias = "pread",
803 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
804 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
805 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
806 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
807 { .name = "pwritev", .errmsg = true,
808 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
809 { .name = "read", .errmsg = true,
810 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
811 { .name = "readlinkat", .errmsg = true,
812 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
813 { .name = "readv", .errmsg = true,
814 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
815 { .name = "recvfrom", .errmsg = true,
816 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
817 { .name = "recvmmsg", .errmsg = true,
818 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
819 { .name = "recvmsg", .errmsg = true,
820 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
821 { .name = "renameat", .errmsg = true,
822 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
823 { .name = "rt_sigaction", .errmsg = true,
824 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
825 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
826 { .name = "rt_sigqueueinfo", .errmsg = true,
827 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
828 { .name = "rt_tgsigqueueinfo", .errmsg = true,
829 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
830 { .name = "select", .errmsg = true, .timeout = true, },
831 { .name = "sendmmsg", .errmsg = true,
832 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
833 { .name = "sendmsg", .errmsg = true,
834 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
835 { .name = "sendto", .errmsg = true,
836 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
837 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
838 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
839 { .name = "shutdown", .errmsg = true,
840 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
841 { .name = "socket", .errmsg = true,
842 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
843 [1] = SCA_SK_TYPE, /* type */ },
844 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
845 { .name = "socketpair", .errmsg = true,
846 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
847 [1] = SCA_SK_TYPE, /* type */ },
848 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
849 { .name = "stat", .errmsg = true, .alias = "newstat", },
850 { .name = "symlinkat", .errmsg = true,
851 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
852 { .name = "tgkill", .errmsg = true,
853 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
854 { .name = "tkill", .errmsg = true,
855 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
856 { .name = "uname", .errmsg = true, .alias = "newuname", },
857 { .name = "unlinkat", .errmsg = true,
858 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
859 { .name = "utimensat", .errmsg = true,
860 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
861 { .name = "write", .errmsg = true,
862 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
863 { .name = "writev", .errmsg = true,
864 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
867 static int syscall_fmt__cmp(const void *name, const void *fmtp)
869 const struct syscall_fmt *fmt = fmtp;
870 return strcmp(name, fmt->name);
873 static struct syscall_fmt *syscall_fmt__find(const char *name)
875 const int nmemb = ARRAY_SIZE(syscall_fmts);
876 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
880 struct event_format *tp_format;
883 struct syscall_fmt *fmt;
884 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
888 static size_t fprintf_duration(unsigned long t, FILE *fp)
890 double duration = (double)t / NSEC_PER_MSEC;
891 size_t printed = fprintf(fp, "(");
894 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
895 else if (duration >= 0.01)
896 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
898 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
899 return printed + fprintf(fp, "): ");
902 struct thread_trace {
906 unsigned long nr_events;
914 struct intlist *syscall_stats;
917 static struct thread_trace *thread_trace__new(void)
919 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
922 ttrace->paths.max = -1;
924 ttrace->syscall_stats = intlist__new(NULL);
929 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
931 struct thread_trace *ttrace;
936 if (thread->priv == NULL)
937 thread->priv = thread_trace__new();
939 if (thread->priv == NULL)
942 ttrace = thread->priv;
947 color_fprintf(fp, PERF_COLOR_RED,
948 "WARNING: not enough memory, dropping samples!\n");
953 struct perf_tool tool;
960 struct syscall *table;
962 struct perf_record_opts opts;
963 struct machine *host;
967 unsigned long nr_events;
968 struct strlist *ev_qualifier;
969 bool not_ev_qualifier;
971 const char *last_vfs_getname;
972 struct intlist *tid_list;
973 struct intlist *pid_list;
975 bool multiple_threads;
978 bool show_tool_stats;
979 double duration_filter;
982 u64 vfs_getname, proc_getname;
986 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
988 struct thread_trace *ttrace = thread->priv;
990 if (fd > ttrace->paths.max) {
991 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
996 if (ttrace->paths.max != -1) {
997 memset(npath + ttrace->paths.max + 1, 0,
998 (fd - ttrace->paths.max) * sizeof(char *));
1000 memset(npath, 0, (fd + 1) * sizeof(char *));
1003 ttrace->paths.table = npath;
1004 ttrace->paths.max = fd;
1007 ttrace->paths.table[fd] = strdup(pathname);
1009 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1012 static int thread__read_fd_path(struct thread *thread, int fd)
1014 char linkname[PATH_MAX], pathname[PATH_MAX];
1018 if (thread->pid_ == thread->tid) {
1019 scnprintf(linkname, sizeof(linkname),
1020 "/proc/%d/fd/%d", thread->pid_, fd);
1022 scnprintf(linkname, sizeof(linkname),
1023 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1026 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1029 ret = readlink(linkname, pathname, sizeof(pathname));
1031 if (ret < 0 || ret > st.st_size)
1034 pathname[ret] = '\0';
1035 return trace__set_fd_pathname(thread, fd, pathname);
1038 static const char *thread__fd_path(struct thread *thread, int fd,
1039 struct trace *trace)
1041 struct thread_trace *ttrace = thread->priv;
1049 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
1052 ++trace->stats.proc_getname;
1053 if (thread__read_fd_path(thread, fd)) {
1057 return ttrace->paths.table[fd];
1060 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1061 struct syscall_arg *arg)
1064 size_t printed = scnprintf(bf, size, "%d", fd);
1065 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1068 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1073 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1074 struct syscall_arg *arg)
1077 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1078 struct thread_trace *ttrace = arg->thread->priv;
1080 if (ttrace && fd >= 0 && fd <= ttrace->paths.max) {
1081 free(ttrace->paths.table[fd]);
1082 ttrace->paths.table[fd] = NULL;
1088 static bool trace__filter_duration(struct trace *trace, double t)
1090 return t < (trace->duration_filter * NSEC_PER_MSEC);
1093 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1095 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1097 return fprintf(fp, "%10.3f ", ts);
1100 static bool done = false;
1101 static bool interrupted = false;
1103 static void sig_handler(int sig)
1106 interrupted = sig == SIGINT;
1109 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1110 u64 duration, u64 tstamp, FILE *fp)
1112 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1113 printed += fprintf_duration(duration, fp);
1115 if (trace->multiple_threads) {
1116 if (trace->show_comm)
1117 printed += fprintf(fp, "%.14s/", thread->comm);
1118 printed += fprintf(fp, "%d ", thread->tid);
1124 static int trace__process_event(struct trace *trace, struct machine *machine,
1125 union perf_event *event)
1129 switch (event->header.type) {
1130 case PERF_RECORD_LOST:
1131 color_fprintf(trace->output, PERF_COLOR_RED,
1132 "LOST %" PRIu64 " events!\n", event->lost.lost);
1133 ret = machine__process_lost_event(machine, event);
1135 ret = machine__process_event(machine, event);
1142 static int trace__tool_process(struct perf_tool *tool,
1143 union perf_event *event,
1144 struct perf_sample *sample __maybe_unused,
1145 struct machine *machine)
1147 struct trace *trace = container_of(tool, struct trace, tool);
1148 return trace__process_event(trace, machine, event);
1151 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1153 int err = symbol__init();
1158 trace->host = machine__new_host();
1159 if (trace->host == NULL)
1162 if (perf_target__has_task(&trace->opts.target)) {
1163 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
1164 trace__tool_process,
1167 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
1177 static int syscall__set_arg_fmts(struct syscall *sc)
1179 struct format_field *field;
1182 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1183 if (sc->arg_scnprintf == NULL)
1187 sc->arg_parm = sc->fmt->arg_parm;
1189 for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1190 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1191 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1192 else if (field->flags & FIELD_IS_POINTER)
1193 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1200 static int trace__read_syscall_info(struct trace *trace, int id)
1204 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1209 if (id > trace->syscalls.max) {
1210 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1212 if (nsyscalls == NULL)
1215 if (trace->syscalls.max != -1) {
1216 memset(nsyscalls + trace->syscalls.max + 1, 0,
1217 (id - trace->syscalls.max) * sizeof(*sc));
1219 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1222 trace->syscalls.table = nsyscalls;
1223 trace->syscalls.max = id;
1226 sc = trace->syscalls.table + id;
1229 if (trace->ev_qualifier) {
1230 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1232 if (!(in ^ trace->not_ev_qualifier)) {
1233 sc->filtered = true;
1235 * No need to do read tracepoint information since this will be
1242 sc->fmt = syscall_fmt__find(sc->name);
1244 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1245 sc->tp_format = event_format__new("syscalls", tp_name);
1247 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1248 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1249 sc->tp_format = event_format__new("syscalls", tp_name);
1252 if (sc->tp_format == NULL)
1255 return syscall__set_arg_fmts(sc);
1258 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1259 unsigned long *args, struct trace *trace,
1260 struct thread *thread)
1264 if (sc->tp_format != NULL) {
1265 struct format_field *field;
1267 struct syscall_arg arg = {
1274 for (field = sc->tp_format->format.fields->next; field;
1275 field = field->next, ++arg.idx, bit <<= 1) {
1279 * Suppress this argument if its value is zero and
1280 * and we don't have a string associated in an
1283 if (args[arg.idx] == 0 &&
1284 !(sc->arg_scnprintf &&
1285 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1286 sc->arg_parm[arg.idx]))
1289 printed += scnprintf(bf + printed, size - printed,
1290 "%s%s: ", printed ? ", " : "", field->name);
1291 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1292 arg.val = args[arg.idx];
1294 arg.parm = sc->arg_parm[arg.idx];
1295 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1296 size - printed, &arg);
1298 printed += scnprintf(bf + printed, size - printed,
1299 "%ld", args[arg.idx]);
1306 printed += scnprintf(bf + printed, size - printed,
1308 printed ? ", " : "", i, args[i]);
1316 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1317 struct perf_sample *sample);
1319 static struct syscall *trace__syscall_info(struct trace *trace,
1320 struct perf_evsel *evsel, int id)
1326 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1327 * before that, leaving at a higher verbosity level till that is
1328 * explained. Reproduced with plain ftrace with:
1330 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1331 * grep "NR -1 " /t/trace_pipe
1333 * After generating some load on the machine.
1337 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1338 id, perf_evsel__name(evsel), ++n);
1343 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1344 trace__read_syscall_info(trace, id))
1347 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1350 return &trace->syscalls.table[id];
1354 fprintf(trace->output, "Problems reading syscall %d", id);
1355 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1356 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1357 fputs(" information\n", trace->output);
1362 static void thread__update_stats(struct thread_trace *ttrace,
1363 int id, struct perf_sample *sample)
1365 struct int_node *inode;
1366 struct stats *stats;
1369 inode = intlist__findnew(ttrace->syscall_stats, id);
1373 stats = inode->priv;
1374 if (stats == NULL) {
1375 stats = malloc(sizeof(struct stats));
1379 inode->priv = stats;
1382 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1383 duration = sample->time - ttrace->entry_time;
1385 update_stats(stats, duration);
1388 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1389 struct perf_sample *sample)
1394 struct thread *thread;
1395 int id = perf_evsel__intval(evsel, sample, "id");
1396 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1397 struct thread_trace *ttrace;
1405 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1406 ttrace = thread__trace(thread, trace->output);
1410 args = perf_evsel__rawptr(evsel, sample, "args");
1412 fprintf(trace->output, "Problems reading syscall arguments\n");
1416 ttrace = thread->priv;
1418 if (ttrace->entry_str == NULL) {
1419 ttrace->entry_str = malloc(1024);
1420 if (!ttrace->entry_str)
1424 ttrace->entry_time = sample->time;
1425 msg = ttrace->entry_str;
1426 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1428 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1429 args, trace, thread);
1431 if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1432 if (!trace->duration_filter) {
1433 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1434 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1437 ttrace->entry_pending = true;
1442 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1443 struct perf_sample *sample)
1447 struct thread *thread;
1448 int id = perf_evsel__intval(evsel, sample, "id");
1449 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1450 struct thread_trace *ttrace;
1458 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1459 ttrace = thread__trace(thread, trace->output);
1464 thread__update_stats(ttrace, id, sample);
1466 ret = perf_evsel__intval(evsel, sample, "ret");
1468 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1469 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1470 trace->last_vfs_getname = NULL;
1471 ++trace->stats.vfs_getname;
1474 ttrace = thread->priv;
1476 ttrace->exit_time = sample->time;
1478 if (ttrace->entry_time) {
1479 duration = sample->time - ttrace->entry_time;
1480 if (trace__filter_duration(trace, duration))
1482 } else if (trace->duration_filter)
1485 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1487 if (ttrace->entry_pending) {
1488 fprintf(trace->output, "%-70s", ttrace->entry_str);
1490 fprintf(trace->output, " ... [");
1491 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1492 fprintf(trace->output, "]: %s()", sc->name);
1495 if (sc->fmt == NULL) {
1497 fprintf(trace->output, ") = %d", ret);
1498 } else if (ret < 0 && sc->fmt->errmsg) {
1500 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1501 *e = audit_errno_to_name(-ret);
1503 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1504 } else if (ret == 0 && sc->fmt->timeout)
1505 fprintf(trace->output, ") = 0 Timeout");
1506 else if (sc->fmt->hexret)
1507 fprintf(trace->output, ") = %#x", ret);
1511 fputc('\n', trace->output);
1513 ttrace->entry_pending = false;
1518 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1519 struct perf_sample *sample)
1521 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1525 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1526 struct perf_sample *sample)
1528 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1529 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1530 struct thread *thread = machine__findnew_thread(trace->host,
1533 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1538 ttrace->runtime_ms += runtime_ms;
1539 trace->runtime_ms += runtime_ms;
1543 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1545 perf_evsel__strval(evsel, sample, "comm"),
1546 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1548 perf_evsel__intval(evsel, sample, "vruntime"));
1552 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1554 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1555 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1558 if (trace->pid_list || trace->tid_list)
1564 static int trace__process_sample(struct perf_tool *tool,
1565 union perf_event *event __maybe_unused,
1566 struct perf_sample *sample,
1567 struct perf_evsel *evsel,
1568 struct machine *machine __maybe_unused)
1570 struct trace *trace = container_of(tool, struct trace, tool);
1573 tracepoint_handler handler = evsel->handler.func;
1575 if (skip_sample(trace, sample))
1578 if (!trace->full_time && trace->base_time == 0)
1579 trace->base_time = sample->time;
1582 handler(trace, evsel, sample);
1588 perf_session__has_tp(struct perf_session *session, const char *name)
1590 struct perf_evsel *evsel;
1592 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
1594 return evsel != NULL;
1597 static int parse_target_str(struct trace *trace)
1599 if (trace->opts.target.pid) {
1600 trace->pid_list = intlist__new(trace->opts.target.pid);
1601 if (trace->pid_list == NULL) {
1602 pr_err("Error parsing process id string\n");
1607 if (trace->opts.target.tid) {
1608 trace->tid_list = intlist__new(trace->opts.target.tid);
1609 if (trace->tid_list == NULL) {
1610 pr_err("Error parsing thread id string\n");
1618 static int trace__record(int argc, const char **argv)
1620 unsigned int rec_argc, i, j;
1621 const char **rec_argv;
1622 const char * const record_args[] = {
1627 "-e", "raw_syscalls:sys_enter,raw_syscalls:sys_exit",
1630 rec_argc = ARRAY_SIZE(record_args) + argc;
1631 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1633 if (rec_argv == NULL)
1636 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1637 rec_argv[i] = record_args[i];
1639 for (j = 0; j < (unsigned int)argc; j++, i++)
1640 rec_argv[i] = argv[j];
1642 return cmd_record(i, rec_argv, NULL);
1645 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1647 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1649 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname",
1650 evlist->nr_entries);
1654 if (perf_evsel__field(evsel, "pathname") == NULL) {
1655 perf_evsel__delete(evsel);
1659 evsel->handler.func = trace__vfs_getname;
1660 perf_evlist__add(evlist, evsel);
1663 static int trace__run(struct trace *trace, int argc, const char **argv)
1665 struct perf_evlist *evlist = perf_evlist__new();
1666 struct perf_evsel *evsel;
1668 unsigned long before;
1669 const bool forks = argc > 0;
1673 if (evlist == NULL) {
1674 fprintf(trace->output, "Not enough memory to run!\n");
1678 if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
1679 perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit))
1682 perf_evlist__add_vfs_getname(evlist);
1685 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1686 trace__sched_stat_runtime))
1689 err = perf_evlist__create_maps(evlist, &trace->opts.target);
1691 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1692 goto out_delete_evlist;
1695 err = trace__symbols_init(trace, evlist);
1697 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1698 goto out_delete_maps;
1701 perf_evlist__config(evlist, &trace->opts);
1703 signal(SIGCHLD, sig_handler);
1704 signal(SIGINT, sig_handler);
1707 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1708 argv, false, false);
1710 fprintf(trace->output, "Couldn't run the workload!\n");
1711 goto out_delete_maps;
1715 err = perf_evlist__open(evlist);
1717 fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno));
1718 goto out_delete_maps;
1721 err = perf_evlist__mmap(evlist, UINT_MAX, false);
1723 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1724 goto out_close_evlist;
1727 perf_evlist__enable(evlist);
1730 perf_evlist__start_workload(evlist);
1732 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1734 before = trace->nr_events;
1736 for (i = 0; i < evlist->nr_mmaps; i++) {
1737 union perf_event *event;
1739 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1740 const u32 type = event->header.type;
1741 tracepoint_handler handler;
1742 struct perf_sample sample;
1746 err = perf_evlist__parse_sample(evlist, event, &sample);
1748 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1752 if (!trace->full_time && trace->base_time == 0)
1753 trace->base_time = sample.time;
1755 if (type != PERF_RECORD_SAMPLE) {
1756 trace__process_event(trace, trace->host, event);
1760 evsel = perf_evlist__id2evsel(evlist, sample.id);
1761 if (evsel == NULL) {
1762 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1766 if (sample.raw_data == NULL) {
1767 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1768 perf_evsel__name(evsel), sample.tid,
1769 sample.cpu, sample.raw_size);
1773 handler = evsel->handler.func;
1774 handler(trace, evsel, &sample);
1781 if (trace->nr_events == before) {
1782 int timeout = done ? 100 : -1;
1784 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
1791 perf_evlist__disable(evlist);
1795 trace__fprintf_thread_summary(trace, trace->output);
1797 if (trace->show_tool_stats) {
1798 fprintf(trace->output, "Stats:\n "
1799 " vfs_getname : %" PRIu64 "\n"
1800 " proc_getname: %" PRIu64 "\n",
1801 trace->stats.vfs_getname,
1802 trace->stats.proc_getname);
1806 perf_evlist__munmap(evlist);
1808 perf_evlist__close(evlist);
1810 perf_evlist__delete_maps(evlist);
1812 perf_evlist__delete(evlist);
1814 trace->live = false;
1819 fputs("Error:\tUnable to find debugfs\n"
1820 "Hint:\tWas your kernel was compiled with debugfs support?\n"
1821 "Hint:\tIs the debugfs filesystem mounted?\n"
1822 "Hint:\tTry 'sudo mount -t debugfs nodev /sys/kernel/debug'\n",
1826 fprintf(trace->output,
1827 "Error:\tNo permissions to read %s/tracing/events/raw_syscalls\n"
1828 "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n",
1829 debugfs_mountpoint, debugfs_mountpoint);
1833 fprintf(trace->output, "Can't trace: %s\n",
1834 strerror_r(errno, bf, sizeof(bf)));
1838 goto out_delete_evlist;
1841 static int trace__replay(struct trace *trace)
1843 const struct perf_evsel_str_handler handlers[] = {
1844 { "raw_syscalls:sys_enter", trace__sys_enter, },
1845 { "raw_syscalls:sys_exit", trace__sys_exit, },
1846 { "probe:vfs_getname", trace__vfs_getname, },
1849 struct perf_session *session;
1852 trace->tool.sample = trace__process_sample;
1853 trace->tool.mmap = perf_event__process_mmap;
1854 trace->tool.mmap2 = perf_event__process_mmap2;
1855 trace->tool.comm = perf_event__process_comm;
1856 trace->tool.exit = perf_event__process_exit;
1857 trace->tool.fork = perf_event__process_fork;
1858 trace->tool.attr = perf_event__process_attr;
1859 trace->tool.tracing_data = perf_event__process_tracing_data;
1860 trace->tool.build_id = perf_event__process_build_id;
1862 trace->tool.ordered_samples = true;
1863 trace->tool.ordering_requires_timestamps = true;
1865 /* add tid to output */
1866 trace->multiple_threads = true;
1868 if (symbol__init() < 0)
1871 session = perf_session__new(input_name, O_RDONLY, 0, false,
1873 if (session == NULL)
1876 trace->host = &session->machines.host;
1878 err = perf_session__set_tracepoints_handlers(session, handlers);
1882 if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
1883 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
1887 if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
1888 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
1892 err = parse_target_str(trace);
1898 err = perf_session__process_events(session, &trace->tool);
1900 pr_err("Failed to process events, error %d", err);
1902 else if (trace->summary)
1903 trace__fprintf_thread_summary(trace, trace->output);
1906 perf_session__delete(session);
1911 static size_t trace__fprintf_threads_header(FILE *fp)
1915 printed = fprintf(fp, "\n _____________________________________________________________________________\n");
1916 printed += fprintf(fp, " __) Summary of events (__\n\n");
1917 printed += fprintf(fp, " [ task - pid ] [ events ] [ ratio ] [ runtime ]\n");
1918 printed += fprintf(fp, " syscall count min max avg stddev\n");
1919 printed += fprintf(fp, " msec msec msec %%\n");
1920 printed += fprintf(fp, " _____________________________________________________________________________\n\n");
1925 static size_t thread__dump_stats(struct thread_trace *ttrace,
1926 struct trace *trace, FILE *fp)
1928 struct stats *stats;
1931 struct int_node *inode = intlist__first(ttrace->syscall_stats);
1936 printed += fprintf(fp, "\n");
1938 /* each int_node is a syscall */
1940 stats = inode->priv;
1942 double min = (double)(stats->min) / NSEC_PER_MSEC;
1943 double max = (double)(stats->max) / NSEC_PER_MSEC;
1944 double avg = avg_stats(stats);
1946 u64 n = (u64) stats->n;
1948 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
1949 avg /= NSEC_PER_MSEC;
1951 sc = &trace->syscalls.table[inode->i];
1952 printed += fprintf(fp, "%24s %14s : ", "", sc->name);
1953 printed += fprintf(fp, "%5" PRIu64 " %8.3f %8.3f",
1955 printed += fprintf(fp, " %8.3f %6.2f\n", avg, pct);
1958 inode = intlist__next(inode);
1961 printed += fprintf(fp, "\n\n");
1966 /* struct used to pass data to per-thread function */
1967 struct summary_data {
1969 struct trace *trace;
1973 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
1975 struct summary_data *data = priv;
1976 FILE *fp = data->fp;
1977 size_t printed = data->printed;
1978 struct trace *trace = data->trace;
1979 struct thread_trace *ttrace = thread->priv;
1986 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
1988 color = PERF_COLOR_NORMAL;
1990 color = PERF_COLOR_RED;
1991 else if (ratio > 25.0)
1992 color = PERF_COLOR_GREEN;
1993 else if (ratio > 5.0)
1994 color = PERF_COLOR_YELLOW;
1996 printed += color_fprintf(fp, color, "%20s", thread->comm);
1997 printed += fprintf(fp, " - %-5d :%11lu [", thread->tid, ttrace->nr_events);
1998 printed += color_fprintf(fp, color, "%5.1f%%", ratio);
1999 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
2000 printed += thread__dump_stats(ttrace, trace, fp);
2002 data->printed += printed;
2007 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2009 struct summary_data data = {
2013 data.printed = trace__fprintf_threads_header(fp);
2015 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2017 return data.printed;
2020 static int trace__set_duration(const struct option *opt, const char *str,
2021 int unset __maybe_unused)
2023 struct trace *trace = opt->value;
2025 trace->duration_filter = atof(str);
2029 static int trace__open_output(struct trace *trace, const char *filename)
2033 if (!stat(filename, &st) && st.st_size) {
2034 char oldname[PATH_MAX];
2036 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2038 rename(filename, oldname);
2041 trace->output = fopen(filename, "w");
2043 return trace->output == NULL ? -errno : 0;
2046 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2048 const char * const trace_usage[] = {
2049 "perf trace [<options>] [<command>]",
2050 "perf trace [<options>] -- <command> [<options>]",
2051 "perf trace record [<options>] [<command>]",
2052 "perf trace record [<options>] -- <command> [<options>]",
2055 struct trace trace = {
2057 .machine = audit_detect_machine(),
2058 .open_id = audit_name_to_syscall("open", trace.audit.machine),
2068 .user_freq = UINT_MAX,
2069 .user_interval = ULLONG_MAX,
2076 const char *output_name = NULL;
2077 const char *ev_qualifier_str = NULL;
2078 const struct option trace_options[] = {
2079 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2080 "show the thread COMM next to its id"),
2081 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2082 OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2083 "list of events to trace"),
2084 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2085 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2086 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2087 "trace events on existing process id"),
2088 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2089 "trace events on existing thread id"),
2090 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2091 "system-wide collection from all CPUs"),
2092 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2093 "list of cpus to monitor"),
2094 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2095 "child tasks do not inherit counters"),
2096 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2097 "number of mmap data pages",
2098 perf_evlist__parse_mmap_pages),
2099 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2101 OPT_CALLBACK(0, "duration", &trace, "float",
2102 "show only events with duration > N.M ms",
2103 trace__set_duration),
2104 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2105 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2106 OPT_BOOLEAN('T', "time", &trace.full_time,
2107 "Show full timestamp, not time relative to first start"),
2108 OPT_BOOLEAN(0, "summary", &trace.summary,
2109 "Show syscall summary with statistics"),
2115 if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2116 return trace__record(argc-2, &argv[2]);
2118 argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2120 if (output_name != NULL) {
2121 err = trace__open_output(&trace, output_name);
2123 perror("failed to create output file");
2128 if (ev_qualifier_str != NULL) {
2129 const char *s = ev_qualifier_str;
2131 trace.not_ev_qualifier = *s == '!';
2132 if (trace.not_ev_qualifier)
2134 trace.ev_qualifier = strlist__new(true, s);
2135 if (trace.ev_qualifier == NULL) {
2136 fputs("Not enough memory to parse event qualifier",
2143 err = perf_target__validate(&trace.opts.target);
2145 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2146 fprintf(trace.output, "%s", bf);
2150 err = perf_target__parse_uid(&trace.opts.target);
2152 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2153 fprintf(trace.output, "%s", bf);
2157 if (!argc && perf_target__none(&trace.opts.target))
2158 trace.opts.target.system_wide = true;
2161 err = trace__replay(&trace);
2163 err = trace__run(&trace, argc, argv);
2166 if (output_name != NULL)
2167 fclose(trace.output);