]> Pileus Git - ~andy/linux/blob - tools/perf/builtin-trace.c
perf evlist: Close fds on destructor
[~andy/linux] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
15 #include "util/parse-events.h"
16
17 #include <libaudit.h>
18 #include <stdlib.h>
19 #include <sys/eventfd.h>
20 #include <sys/mman.h>
21 #include <linux/futex.h>
22
23 /* For older distros: */
24 #ifndef MAP_STACK
25 # define MAP_STACK              0x20000
26 #endif
27
28 #ifndef MADV_HWPOISON
29 # define MADV_HWPOISON          100
30 #endif
31
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE         12
34 #endif
35
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE       13
38 #endif
39
40 struct tp_field {
41         int offset;
42         union {
43                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
44                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
45         };
46 };
47
48 #define TP_UINT_FIELD(bits) \
49 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
50 { \
51         return *(u##bits *)(sample->raw_data + field->offset); \
52 }
53
54 TP_UINT_FIELD(8);
55 TP_UINT_FIELD(16);
56 TP_UINT_FIELD(32);
57 TP_UINT_FIELD(64);
58
59 #define TP_UINT_FIELD__SWAPPED(bits) \
60 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
61 { \
62         u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
63         return bswap_##bits(value);\
64 }
65
66 TP_UINT_FIELD__SWAPPED(16);
67 TP_UINT_FIELD__SWAPPED(32);
68 TP_UINT_FIELD__SWAPPED(64);
69
70 static int tp_field__init_uint(struct tp_field *field,
71                                struct format_field *format_field,
72                                bool needs_swap)
73 {
74         field->offset = format_field->offset;
75
76         switch (format_field->size) {
77         case 1:
78                 field->integer = tp_field__u8;
79                 break;
80         case 2:
81                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
82                 break;
83         case 4:
84                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
85                 break;
86         case 8:
87                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
88                 break;
89         default:
90                 return -1;
91         }
92
93         return 0;
94 }
95
96 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
97 {
98         return sample->raw_data + field->offset;
99 }
100
101 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
102 {
103         field->offset = format_field->offset;
104         field->pointer = tp_field__ptr;
105         return 0;
106 }
107
108 struct syscall_tp {
109         struct tp_field id;
110         union {
111                 struct tp_field args, ret;
112         };
113 };
114
115 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
116                                           struct tp_field *field,
117                                           const char *name)
118 {
119         struct format_field *format_field = perf_evsel__field(evsel, name);
120
121         if (format_field == NULL)
122                 return -1;
123
124         return tp_field__init_uint(field, format_field, evsel->needs_swap);
125 }
126
127 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
128         ({ struct syscall_tp *sc = evsel->priv;\
129            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
130
131 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
132                                          struct tp_field *field,
133                                          const char *name)
134 {
135         struct format_field *format_field = perf_evsel__field(evsel, name);
136
137         if (format_field == NULL)
138                 return -1;
139
140         return tp_field__init_ptr(field, format_field);
141 }
142
143 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
144         ({ struct syscall_tp *sc = evsel->priv;\
145            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
146
147 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
148 {
149         zfree(&evsel->priv);
150         perf_evsel__delete(evsel);
151 }
152
153 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
154 {
155         evsel->priv = malloc(sizeof(struct syscall_tp));
156         if (evsel->priv != NULL) {
157                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
158                         goto out_delete;
159
160                 evsel->handler = handler;
161                 return 0;
162         }
163
164         return -ENOMEM;
165
166 out_delete:
167         zfree(&evsel->priv);
168         return -ENOENT;
169 }
170
171 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
172 {
173         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
174
175         /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
176         if (evsel == NULL)
177                 evsel = perf_evsel__newtp("syscalls", direction);
178
179         if (evsel) {
180                 if (perf_evsel__init_syscall_tp(evsel, handler))
181                         goto out_delete;
182         }
183
184         return evsel;
185
186 out_delete:
187         perf_evsel__delete_priv(evsel);
188         return NULL;
189 }
190
191 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
192         ({ struct syscall_tp *fields = evsel->priv; \
193            fields->name.integer(&fields->name, sample); })
194
195 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
196         ({ struct syscall_tp *fields = evsel->priv; \
197            fields->name.pointer(&fields->name, sample); })
198
199 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
200                                           void *sys_enter_handler,
201                                           void *sys_exit_handler)
202 {
203         int ret = -1;
204         struct perf_evsel *sys_enter, *sys_exit;
205
206         sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
207         if (sys_enter == NULL)
208                 goto out;
209
210         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
211                 goto out_delete_sys_enter;
212
213         sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
214         if (sys_exit == NULL)
215                 goto out_delete_sys_enter;
216
217         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
218                 goto out_delete_sys_exit;
219
220         perf_evlist__add(evlist, sys_enter);
221         perf_evlist__add(evlist, sys_exit);
222
223         ret = 0;
224 out:
225         return ret;
226
227 out_delete_sys_exit:
228         perf_evsel__delete_priv(sys_exit);
229 out_delete_sys_enter:
230         perf_evsel__delete_priv(sys_enter);
231         goto out;
232 }
233
234
235 struct syscall_arg {
236         unsigned long val;
237         struct thread *thread;
238         struct trace  *trace;
239         void          *parm;
240         u8            idx;
241         u8            mask;
242 };
243
244 struct strarray {
245         int         offset;
246         int         nr_entries;
247         const char **entries;
248 };
249
250 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
251         .nr_entries = ARRAY_SIZE(array), \
252         .entries = array, \
253 }
254
255 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
256         .offset     = off, \
257         .nr_entries = ARRAY_SIZE(array), \
258         .entries = array, \
259 }
260
261 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
262                                                 const char *intfmt,
263                                                 struct syscall_arg *arg)
264 {
265         struct strarray *sa = arg->parm;
266         int idx = arg->val - sa->offset;
267
268         if (idx < 0 || idx >= sa->nr_entries)
269                 return scnprintf(bf, size, intfmt, arg->val);
270
271         return scnprintf(bf, size, "%s", sa->entries[idx]);
272 }
273
274 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
275                                               struct syscall_arg *arg)
276 {
277         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
278 }
279
280 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
281
282 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
283                                                  struct syscall_arg *arg)
284 {
285         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
286 }
287
288 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
289
290 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
291                                         struct syscall_arg *arg);
292
293 #define SCA_FD syscall_arg__scnprintf_fd
294
295 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
296                                            struct syscall_arg *arg)
297 {
298         int fd = arg->val;
299
300         if (fd == AT_FDCWD)
301                 return scnprintf(bf, size, "CWD");
302
303         return syscall_arg__scnprintf_fd(bf, size, arg);
304 }
305
306 #define SCA_FDAT syscall_arg__scnprintf_fd_at
307
308 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
309                                               struct syscall_arg *arg);
310
311 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
312
313 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
314                                          struct syscall_arg *arg)
315 {
316         return scnprintf(bf, size, "%#lx", arg->val);
317 }
318
319 #define SCA_HEX syscall_arg__scnprintf_hex
320
321 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
322                                                struct syscall_arg *arg)
323 {
324         int printed = 0, prot = arg->val;
325
326         if (prot == PROT_NONE)
327                 return scnprintf(bf, size, "NONE");
328 #define P_MMAP_PROT(n) \
329         if (prot & PROT_##n) { \
330                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
331                 prot &= ~PROT_##n; \
332         }
333
334         P_MMAP_PROT(EXEC);
335         P_MMAP_PROT(READ);
336         P_MMAP_PROT(WRITE);
337 #ifdef PROT_SEM
338         P_MMAP_PROT(SEM);
339 #endif
340         P_MMAP_PROT(GROWSDOWN);
341         P_MMAP_PROT(GROWSUP);
342 #undef P_MMAP_PROT
343
344         if (prot)
345                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
346
347         return printed;
348 }
349
350 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
351
352 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
353                                                 struct syscall_arg *arg)
354 {
355         int printed = 0, flags = arg->val;
356
357 #define P_MMAP_FLAG(n) \
358         if (flags & MAP_##n) { \
359                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
360                 flags &= ~MAP_##n; \
361         }
362
363         P_MMAP_FLAG(SHARED);
364         P_MMAP_FLAG(PRIVATE);
365 #ifdef MAP_32BIT
366         P_MMAP_FLAG(32BIT);
367 #endif
368         P_MMAP_FLAG(ANONYMOUS);
369         P_MMAP_FLAG(DENYWRITE);
370         P_MMAP_FLAG(EXECUTABLE);
371         P_MMAP_FLAG(FILE);
372         P_MMAP_FLAG(FIXED);
373         P_MMAP_FLAG(GROWSDOWN);
374 #ifdef MAP_HUGETLB
375         P_MMAP_FLAG(HUGETLB);
376 #endif
377         P_MMAP_FLAG(LOCKED);
378         P_MMAP_FLAG(NONBLOCK);
379         P_MMAP_FLAG(NORESERVE);
380         P_MMAP_FLAG(POPULATE);
381         P_MMAP_FLAG(STACK);
382 #ifdef MAP_UNINITIALIZED
383         P_MMAP_FLAG(UNINITIALIZED);
384 #endif
385 #undef P_MMAP_FLAG
386
387         if (flags)
388                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
389
390         return printed;
391 }
392
393 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
394
395 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
396                                                       struct syscall_arg *arg)
397 {
398         int behavior = arg->val;
399
400         switch (behavior) {
401 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
402         P_MADV_BHV(NORMAL);
403         P_MADV_BHV(RANDOM);
404         P_MADV_BHV(SEQUENTIAL);
405         P_MADV_BHV(WILLNEED);
406         P_MADV_BHV(DONTNEED);
407         P_MADV_BHV(REMOVE);
408         P_MADV_BHV(DONTFORK);
409         P_MADV_BHV(DOFORK);
410         P_MADV_BHV(HWPOISON);
411 #ifdef MADV_SOFT_OFFLINE
412         P_MADV_BHV(SOFT_OFFLINE);
413 #endif
414         P_MADV_BHV(MERGEABLE);
415         P_MADV_BHV(UNMERGEABLE);
416 #ifdef MADV_HUGEPAGE
417         P_MADV_BHV(HUGEPAGE);
418 #endif
419 #ifdef MADV_NOHUGEPAGE
420         P_MADV_BHV(NOHUGEPAGE);
421 #endif
422 #ifdef MADV_DONTDUMP
423         P_MADV_BHV(DONTDUMP);
424 #endif
425 #ifdef MADV_DODUMP
426         P_MADV_BHV(DODUMP);
427 #endif
428 #undef P_MADV_PHV
429         default: break;
430         }
431
432         return scnprintf(bf, size, "%#x", behavior);
433 }
434
435 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
436
437 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
438                                            struct syscall_arg *arg)
439 {
440         int printed = 0, op = arg->val;
441
442         if (op == 0)
443                 return scnprintf(bf, size, "NONE");
444 #define P_CMD(cmd) \
445         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
446                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
447                 op &= ~LOCK_##cmd; \
448         }
449
450         P_CMD(SH);
451         P_CMD(EX);
452         P_CMD(NB);
453         P_CMD(UN);
454         P_CMD(MAND);
455         P_CMD(RW);
456         P_CMD(READ);
457         P_CMD(WRITE);
458 #undef P_OP
459
460         if (op)
461                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
462
463         return printed;
464 }
465
466 #define SCA_FLOCK syscall_arg__scnprintf_flock
467
468 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
469 {
470         enum syscall_futex_args {
471                 SCF_UADDR   = (1 << 0),
472                 SCF_OP      = (1 << 1),
473                 SCF_VAL     = (1 << 2),
474                 SCF_TIMEOUT = (1 << 3),
475                 SCF_UADDR2  = (1 << 4),
476                 SCF_VAL3    = (1 << 5),
477         };
478         int op = arg->val;
479         int cmd = op & FUTEX_CMD_MASK;
480         size_t printed = 0;
481
482         switch (cmd) {
483 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
484         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
485         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
486         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
487         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
488         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
489         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
490         P_FUTEX_OP(WAKE_OP);                                                      break;
491         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
492         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
493         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
494         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
495         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
496         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
497         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
498         }
499
500         if (op & FUTEX_PRIVATE_FLAG)
501                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
502
503         if (op & FUTEX_CLOCK_REALTIME)
504                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
505
506         return printed;
507 }
508
509 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
510
511 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
512 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
513
514 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
515 static DEFINE_STRARRAY(itimers);
516
517 static const char *whences[] = { "SET", "CUR", "END",
518 #ifdef SEEK_DATA
519 "DATA",
520 #endif
521 #ifdef SEEK_HOLE
522 "HOLE",
523 #endif
524 };
525 static DEFINE_STRARRAY(whences);
526
527 static const char *fcntl_cmds[] = {
528         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
529         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
530         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
531         "F_GETOWNER_UIDS",
532 };
533 static DEFINE_STRARRAY(fcntl_cmds);
534
535 static const char *rlimit_resources[] = {
536         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
537         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
538         "RTTIME",
539 };
540 static DEFINE_STRARRAY(rlimit_resources);
541
542 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
543 static DEFINE_STRARRAY(sighow);
544
545 static const char *clockid[] = {
546         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
547         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
548 };
549 static DEFINE_STRARRAY(clockid);
550
551 static const char *socket_families[] = {
552         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
553         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
554         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
555         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
556         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
557         "ALG", "NFC", "VSOCK",
558 };
559 static DEFINE_STRARRAY(socket_families);
560
561 #ifndef SOCK_TYPE_MASK
562 #define SOCK_TYPE_MASK 0xf
563 #endif
564
565 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
566                                                       struct syscall_arg *arg)
567 {
568         size_t printed;
569         int type = arg->val,
570             flags = type & ~SOCK_TYPE_MASK;
571
572         type &= SOCK_TYPE_MASK;
573         /*
574          * Can't use a strarray, MIPS may override for ABI reasons.
575          */
576         switch (type) {
577 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
578         P_SK_TYPE(STREAM);
579         P_SK_TYPE(DGRAM);
580         P_SK_TYPE(RAW);
581         P_SK_TYPE(RDM);
582         P_SK_TYPE(SEQPACKET);
583         P_SK_TYPE(DCCP);
584         P_SK_TYPE(PACKET);
585 #undef P_SK_TYPE
586         default:
587                 printed = scnprintf(bf, size, "%#x", type);
588         }
589
590 #define P_SK_FLAG(n) \
591         if (flags & SOCK_##n) { \
592                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
593                 flags &= ~SOCK_##n; \
594         }
595
596         P_SK_FLAG(CLOEXEC);
597         P_SK_FLAG(NONBLOCK);
598 #undef P_SK_FLAG
599
600         if (flags)
601                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
602
603         return printed;
604 }
605
606 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
607
608 #ifndef MSG_PROBE
609 #define MSG_PROBE            0x10
610 #endif
611 #ifndef MSG_WAITFORONE
612 #define MSG_WAITFORONE  0x10000
613 #endif
614 #ifndef MSG_SENDPAGE_NOTLAST
615 #define MSG_SENDPAGE_NOTLAST 0x20000
616 #endif
617 #ifndef MSG_FASTOPEN
618 #define MSG_FASTOPEN         0x20000000
619 #endif
620
621 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
622                                                struct syscall_arg *arg)
623 {
624         int printed = 0, flags = arg->val;
625
626         if (flags == 0)
627                 return scnprintf(bf, size, "NONE");
628 #define P_MSG_FLAG(n) \
629         if (flags & MSG_##n) { \
630                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
631                 flags &= ~MSG_##n; \
632         }
633
634         P_MSG_FLAG(OOB);
635         P_MSG_FLAG(PEEK);
636         P_MSG_FLAG(DONTROUTE);
637         P_MSG_FLAG(TRYHARD);
638         P_MSG_FLAG(CTRUNC);
639         P_MSG_FLAG(PROBE);
640         P_MSG_FLAG(TRUNC);
641         P_MSG_FLAG(DONTWAIT);
642         P_MSG_FLAG(EOR);
643         P_MSG_FLAG(WAITALL);
644         P_MSG_FLAG(FIN);
645         P_MSG_FLAG(SYN);
646         P_MSG_FLAG(CONFIRM);
647         P_MSG_FLAG(RST);
648         P_MSG_FLAG(ERRQUEUE);
649         P_MSG_FLAG(NOSIGNAL);
650         P_MSG_FLAG(MORE);
651         P_MSG_FLAG(WAITFORONE);
652         P_MSG_FLAG(SENDPAGE_NOTLAST);
653         P_MSG_FLAG(FASTOPEN);
654         P_MSG_FLAG(CMSG_CLOEXEC);
655 #undef P_MSG_FLAG
656
657         if (flags)
658                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
659
660         return printed;
661 }
662
663 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
664
665 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
666                                                  struct syscall_arg *arg)
667 {
668         size_t printed = 0;
669         int mode = arg->val;
670
671         if (mode == F_OK) /* 0 */
672                 return scnprintf(bf, size, "F");
673 #define P_MODE(n) \
674         if (mode & n##_OK) { \
675                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
676                 mode &= ~n##_OK; \
677         }
678
679         P_MODE(R);
680         P_MODE(W);
681         P_MODE(X);
682 #undef P_MODE
683
684         if (mode)
685                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
686
687         return printed;
688 }
689
690 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
691
692 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
693                                                struct syscall_arg *arg)
694 {
695         int printed = 0, flags = arg->val;
696
697         if (!(flags & O_CREAT))
698                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
699
700         if (flags == 0)
701                 return scnprintf(bf, size, "RDONLY");
702 #define P_FLAG(n) \
703         if (flags & O_##n) { \
704                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
705                 flags &= ~O_##n; \
706         }
707
708         P_FLAG(APPEND);
709         P_FLAG(ASYNC);
710         P_FLAG(CLOEXEC);
711         P_FLAG(CREAT);
712         P_FLAG(DIRECT);
713         P_FLAG(DIRECTORY);
714         P_FLAG(EXCL);
715         P_FLAG(LARGEFILE);
716         P_FLAG(NOATIME);
717         P_FLAG(NOCTTY);
718 #ifdef O_NONBLOCK
719         P_FLAG(NONBLOCK);
720 #elif O_NDELAY
721         P_FLAG(NDELAY);
722 #endif
723 #ifdef O_PATH
724         P_FLAG(PATH);
725 #endif
726         P_FLAG(RDWR);
727 #ifdef O_DSYNC
728         if ((flags & O_SYNC) == O_SYNC)
729                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
730         else {
731                 P_FLAG(DSYNC);
732         }
733 #else
734         P_FLAG(SYNC);
735 #endif
736         P_FLAG(TRUNC);
737         P_FLAG(WRONLY);
738 #undef P_FLAG
739
740         if (flags)
741                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
742
743         return printed;
744 }
745
746 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
747
748 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
749                                                    struct syscall_arg *arg)
750 {
751         int printed = 0, flags = arg->val;
752
753         if (flags == 0)
754                 return scnprintf(bf, size, "NONE");
755 #define P_FLAG(n) \
756         if (flags & EFD_##n) { \
757                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
758                 flags &= ~EFD_##n; \
759         }
760
761         P_FLAG(SEMAPHORE);
762         P_FLAG(CLOEXEC);
763         P_FLAG(NONBLOCK);
764 #undef P_FLAG
765
766         if (flags)
767                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
768
769         return printed;
770 }
771
772 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
773
774 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
775                                                 struct syscall_arg *arg)
776 {
777         int printed = 0, flags = arg->val;
778
779 #define P_FLAG(n) \
780         if (flags & O_##n) { \
781                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
782                 flags &= ~O_##n; \
783         }
784
785         P_FLAG(CLOEXEC);
786         P_FLAG(NONBLOCK);
787 #undef P_FLAG
788
789         if (flags)
790                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
791
792         return printed;
793 }
794
795 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
796
797 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
798 {
799         int sig = arg->val;
800
801         switch (sig) {
802 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
803         P_SIGNUM(HUP);
804         P_SIGNUM(INT);
805         P_SIGNUM(QUIT);
806         P_SIGNUM(ILL);
807         P_SIGNUM(TRAP);
808         P_SIGNUM(ABRT);
809         P_SIGNUM(BUS);
810         P_SIGNUM(FPE);
811         P_SIGNUM(KILL);
812         P_SIGNUM(USR1);
813         P_SIGNUM(SEGV);
814         P_SIGNUM(USR2);
815         P_SIGNUM(PIPE);
816         P_SIGNUM(ALRM);
817         P_SIGNUM(TERM);
818         P_SIGNUM(STKFLT);
819         P_SIGNUM(CHLD);
820         P_SIGNUM(CONT);
821         P_SIGNUM(STOP);
822         P_SIGNUM(TSTP);
823         P_SIGNUM(TTIN);
824         P_SIGNUM(TTOU);
825         P_SIGNUM(URG);
826         P_SIGNUM(XCPU);
827         P_SIGNUM(XFSZ);
828         P_SIGNUM(VTALRM);
829         P_SIGNUM(PROF);
830         P_SIGNUM(WINCH);
831         P_SIGNUM(IO);
832         P_SIGNUM(PWR);
833         P_SIGNUM(SYS);
834         default: break;
835         }
836
837         return scnprintf(bf, size, "%#x", sig);
838 }
839
840 #define SCA_SIGNUM syscall_arg__scnprintf_signum
841
842 #define TCGETS          0x5401
843
844 static const char *tioctls[] = {
845         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
846         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
847         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
848         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
849         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
850         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
851         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
852         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
853         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
854         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
855         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
856         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
857         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
858         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
859         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
860 };
861
862 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
863
864 #define STRARRAY(arg, name, array) \
865           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
866           .arg_parm      = { [arg] = &strarray__##array, }
867
868 static struct syscall_fmt {
869         const char *name;
870         const char *alias;
871         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
872         void       *arg_parm[6];
873         bool       errmsg;
874         bool       timeout;
875         bool       hexret;
876 } syscall_fmts[] = {
877         { .name     = "access",     .errmsg = true,
878           .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
879         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
880         { .name     = "brk",        .hexret = true,
881           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
882         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
883         { .name     = "close",      .errmsg = true,
884           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, 
885         { .name     = "connect",    .errmsg = true, },
886         { .name     = "dup",        .errmsg = true,
887           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
888         { .name     = "dup2",       .errmsg = true,
889           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
890         { .name     = "dup3",       .errmsg = true,
891           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
892         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
893         { .name     = "eventfd2",   .errmsg = true,
894           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
895         { .name     = "faccessat",  .errmsg = true,
896           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
897         { .name     = "fadvise64",  .errmsg = true,
898           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
899         { .name     = "fallocate",  .errmsg = true,
900           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
901         { .name     = "fchdir",     .errmsg = true,
902           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
903         { .name     = "fchmod",     .errmsg = true,
904           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
905         { .name     = "fchmodat",   .errmsg = true,
906           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
907         { .name     = "fchown",     .errmsg = true,
908           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
909         { .name     = "fchownat",   .errmsg = true,
910           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
911         { .name     = "fcntl",      .errmsg = true,
912           .arg_scnprintf = { [0] = SCA_FD, /* fd */
913                              [1] = SCA_STRARRAY, /* cmd */ },
914           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
915         { .name     = "fdatasync",  .errmsg = true,
916           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
917         { .name     = "flock",      .errmsg = true,
918           .arg_scnprintf = { [0] = SCA_FD, /* fd */
919                              [1] = SCA_FLOCK, /* cmd */ }, },
920         { .name     = "fsetxattr",  .errmsg = true,
921           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
922         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
923           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
924         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
925           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
926         { .name     = "fstatfs",    .errmsg = true,
927           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
928         { .name     = "fsync",    .errmsg = true,
929           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
930         { .name     = "ftruncate", .errmsg = true,
931           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
932         { .name     = "futex",      .errmsg = true,
933           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
934         { .name     = "futimesat", .errmsg = true,
935           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
936         { .name     = "getdents",   .errmsg = true,
937           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
938         { .name     = "getdents64", .errmsg = true,
939           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
940         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
941         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
942         { .name     = "ioctl",      .errmsg = true,
943           .arg_scnprintf = { [0] = SCA_FD, /* fd */ 
944                              [1] = SCA_STRHEXARRAY, /* cmd */
945                              [2] = SCA_HEX, /* arg */ },
946           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
947         { .name     = "kill",       .errmsg = true,
948           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
949         { .name     = "linkat",     .errmsg = true,
950           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
951         { .name     = "lseek",      .errmsg = true,
952           .arg_scnprintf = { [0] = SCA_FD, /* fd */
953                              [2] = SCA_STRARRAY, /* whence */ },
954           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
955         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
956         { .name     = "madvise",    .errmsg = true,
957           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
958                              [2] = SCA_MADV_BHV, /* behavior */ }, },
959         { .name     = "mkdirat",    .errmsg = true,
960           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
961         { .name     = "mknodat",    .errmsg = true,
962           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
963         { .name     = "mlock",      .errmsg = true,
964           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
965         { .name     = "mlockall",   .errmsg = true,
966           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
967         { .name     = "mmap",       .hexret = true,
968           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
969                              [2] = SCA_MMAP_PROT, /* prot */
970                              [3] = SCA_MMAP_FLAGS, /* flags */
971                              [4] = SCA_FD,        /* fd */ }, },
972         { .name     = "mprotect",   .errmsg = true,
973           .arg_scnprintf = { [0] = SCA_HEX, /* start */
974                              [2] = SCA_MMAP_PROT, /* prot */ }, },
975         { .name     = "mremap",     .hexret = true,
976           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
977                              [4] = SCA_HEX, /* new_addr */ }, },
978         { .name     = "munlock",    .errmsg = true,
979           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
980         { .name     = "munmap",     .errmsg = true,
981           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
982         { .name     = "name_to_handle_at", .errmsg = true,
983           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
984         { .name     = "newfstatat", .errmsg = true,
985           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
986         { .name     = "open",       .errmsg = true,
987           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
988         { .name     = "open_by_handle_at", .errmsg = true,
989           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
990                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
991         { .name     = "openat",     .errmsg = true,
992           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
993                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
994         { .name     = "pipe2",      .errmsg = true,
995           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
996         { .name     = "poll",       .errmsg = true, .timeout = true, },
997         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
998         { .name     = "pread",      .errmsg = true, .alias = "pread64",
999           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1000         { .name     = "preadv",     .errmsg = true, .alias = "pread",
1001           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1002         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1003         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
1004           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1005         { .name     = "pwritev",    .errmsg = true,
1006           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1007         { .name     = "read",       .errmsg = true,
1008           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1009         { .name     = "readlinkat", .errmsg = true,
1010           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1011         { .name     = "readv",      .errmsg = true,
1012           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1013         { .name     = "recvfrom",   .errmsg = true,
1014           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1015         { .name     = "recvmmsg",   .errmsg = true,
1016           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1017         { .name     = "recvmsg",    .errmsg = true,
1018           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1019         { .name     = "renameat",   .errmsg = true,
1020           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1021         { .name     = "rt_sigaction", .errmsg = true,
1022           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1023         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1024         { .name     = "rt_sigqueueinfo", .errmsg = true,
1025           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1026         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1027           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1028         { .name     = "select",     .errmsg = true, .timeout = true, },
1029         { .name     = "sendmmsg",    .errmsg = true,
1030           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1031         { .name     = "sendmsg",    .errmsg = true,
1032           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1033         { .name     = "sendto",     .errmsg = true,
1034           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1035         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1036         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1037         { .name     = "shutdown",   .errmsg = true,
1038           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1039         { .name     = "socket",     .errmsg = true,
1040           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1041                              [1] = SCA_SK_TYPE, /* type */ },
1042           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1043         { .name     = "socketpair", .errmsg = true,
1044           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1045                              [1] = SCA_SK_TYPE, /* type */ },
1046           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1047         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
1048         { .name     = "symlinkat",  .errmsg = true,
1049           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1050         { .name     = "tgkill",     .errmsg = true,
1051           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1052         { .name     = "tkill",      .errmsg = true,
1053           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1054         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1055         { .name     = "unlinkat",   .errmsg = true,
1056           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1057         { .name     = "utimensat",  .errmsg = true,
1058           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1059         { .name     = "write",      .errmsg = true,
1060           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1061         { .name     = "writev",     .errmsg = true,
1062           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1063 };
1064
1065 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1066 {
1067         const struct syscall_fmt *fmt = fmtp;
1068         return strcmp(name, fmt->name);
1069 }
1070
1071 static struct syscall_fmt *syscall_fmt__find(const char *name)
1072 {
1073         const int nmemb = ARRAY_SIZE(syscall_fmts);
1074         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1075 }
1076
1077 struct syscall {
1078         struct event_format *tp_format;
1079         const char          *name;
1080         bool                filtered;
1081         struct syscall_fmt  *fmt;
1082         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1083         void                **arg_parm;
1084 };
1085
1086 static size_t fprintf_duration(unsigned long t, FILE *fp)
1087 {
1088         double duration = (double)t / NSEC_PER_MSEC;
1089         size_t printed = fprintf(fp, "(");
1090
1091         if (duration >= 1.0)
1092                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1093         else if (duration >= 0.01)
1094                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1095         else
1096                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1097         return printed + fprintf(fp, "): ");
1098 }
1099
1100 struct thread_trace {
1101         u64               entry_time;
1102         u64               exit_time;
1103         bool              entry_pending;
1104         unsigned long     nr_events;
1105         char              *entry_str;
1106         double            runtime_ms;
1107         struct {
1108                 int       max;
1109                 char      **table;
1110         } paths;
1111
1112         struct intlist *syscall_stats;
1113 };
1114
1115 static struct thread_trace *thread_trace__new(void)
1116 {
1117         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1118
1119         if (ttrace)
1120                 ttrace->paths.max = -1;
1121
1122         ttrace->syscall_stats = intlist__new(NULL);
1123
1124         return ttrace;
1125 }
1126
1127 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1128 {
1129         struct thread_trace *ttrace;
1130
1131         if (thread == NULL)
1132                 goto fail;
1133
1134         if (thread->priv == NULL)
1135                 thread->priv = thread_trace__new();
1136                 
1137         if (thread->priv == NULL)
1138                 goto fail;
1139
1140         ttrace = thread->priv;
1141         ++ttrace->nr_events;
1142
1143         return ttrace;
1144 fail:
1145         color_fprintf(fp, PERF_COLOR_RED,
1146                       "WARNING: not enough memory, dropping samples!\n");
1147         return NULL;
1148 }
1149
1150 struct trace {
1151         struct perf_tool        tool;
1152         struct {
1153                 int             machine;
1154                 int             open_id;
1155         }                       audit;
1156         struct {
1157                 int             max;
1158                 struct syscall  *table;
1159         } syscalls;
1160         struct record_opts      opts;
1161         struct machine          *host;
1162         u64                     base_time;
1163         bool                    full_time;
1164         FILE                    *output;
1165         unsigned long           nr_events;
1166         struct strlist          *ev_qualifier;
1167         bool                    not_ev_qualifier;
1168         bool                    live;
1169         const char              *last_vfs_getname;
1170         struct intlist          *tid_list;
1171         struct intlist          *pid_list;
1172         bool                    sched;
1173         bool                    multiple_threads;
1174         bool                    summary;
1175         bool                    summary_only;
1176         bool                    show_comm;
1177         bool                    show_tool_stats;
1178         double                  duration_filter;
1179         double                  runtime_ms;
1180         struct {
1181                 u64             vfs_getname, proc_getname;
1182         } stats;
1183 };
1184
1185 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1186 {
1187         struct thread_trace *ttrace = thread->priv;
1188
1189         if (fd > ttrace->paths.max) {
1190                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1191
1192                 if (npath == NULL)
1193                         return -1;
1194
1195                 if (ttrace->paths.max != -1) {
1196                         memset(npath + ttrace->paths.max + 1, 0,
1197                                (fd - ttrace->paths.max) * sizeof(char *));
1198                 } else {
1199                         memset(npath, 0, (fd + 1) * sizeof(char *));
1200                 }
1201
1202                 ttrace->paths.table = npath;
1203                 ttrace->paths.max   = fd;
1204         }
1205
1206         ttrace->paths.table[fd] = strdup(pathname);
1207
1208         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1209 }
1210
1211 static int thread__read_fd_path(struct thread *thread, int fd)
1212 {
1213         char linkname[PATH_MAX], pathname[PATH_MAX];
1214         struct stat st;
1215         int ret;
1216
1217         if (thread->pid_ == thread->tid) {
1218                 scnprintf(linkname, sizeof(linkname),
1219                           "/proc/%d/fd/%d", thread->pid_, fd);
1220         } else {
1221                 scnprintf(linkname, sizeof(linkname),
1222                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1223         }
1224
1225         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1226                 return -1;
1227
1228         ret = readlink(linkname, pathname, sizeof(pathname));
1229
1230         if (ret < 0 || ret > st.st_size)
1231                 return -1;
1232
1233         pathname[ret] = '\0';
1234         return trace__set_fd_pathname(thread, fd, pathname);
1235 }
1236
1237 static const char *thread__fd_path(struct thread *thread, int fd,
1238                                    struct trace *trace)
1239 {
1240         struct thread_trace *ttrace = thread->priv;
1241
1242         if (ttrace == NULL)
1243                 return NULL;
1244
1245         if (fd < 0)
1246                 return NULL;
1247
1248         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
1249                 if (!trace->live)
1250                         return NULL;
1251                 ++trace->stats.proc_getname;
1252                 if (thread__read_fd_path(thread, fd)) {
1253                         return NULL;
1254         }
1255
1256         return ttrace->paths.table[fd];
1257 }
1258
1259 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1260                                         struct syscall_arg *arg)
1261 {
1262         int fd = arg->val;
1263         size_t printed = scnprintf(bf, size, "%d", fd);
1264         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1265
1266         if (path)
1267                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1268
1269         return printed;
1270 }
1271
1272 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1273                                               struct syscall_arg *arg)
1274 {
1275         int fd = arg->val;
1276         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1277         struct thread_trace *ttrace = arg->thread->priv;
1278
1279         if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1280                 zfree(&ttrace->paths.table[fd]);
1281
1282         return printed;
1283 }
1284
1285 static bool trace__filter_duration(struct trace *trace, double t)
1286 {
1287         return t < (trace->duration_filter * NSEC_PER_MSEC);
1288 }
1289
1290 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1291 {
1292         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1293
1294         return fprintf(fp, "%10.3f ", ts);
1295 }
1296
1297 static bool done = false;
1298 static bool interrupted = false;
1299
1300 static void sig_handler(int sig)
1301 {
1302         done = true;
1303         interrupted = sig == SIGINT;
1304 }
1305
1306 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1307                                         u64 duration, u64 tstamp, FILE *fp)
1308 {
1309         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1310         printed += fprintf_duration(duration, fp);
1311
1312         if (trace->multiple_threads) {
1313                 if (trace->show_comm)
1314                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1315                 printed += fprintf(fp, "%d ", thread->tid);
1316         }
1317
1318         return printed;
1319 }
1320
1321 static int trace__process_event(struct trace *trace, struct machine *machine,
1322                                 union perf_event *event, struct perf_sample *sample)
1323 {
1324         int ret = 0;
1325
1326         switch (event->header.type) {
1327         case PERF_RECORD_LOST:
1328                 color_fprintf(trace->output, PERF_COLOR_RED,
1329                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1330                 ret = machine__process_lost_event(machine, event, sample);
1331         default:
1332                 ret = machine__process_event(machine, event, sample);
1333                 break;
1334         }
1335
1336         return ret;
1337 }
1338
1339 static int trace__tool_process(struct perf_tool *tool,
1340                                union perf_event *event,
1341                                struct perf_sample *sample,
1342                                struct machine *machine)
1343 {
1344         struct trace *trace = container_of(tool, struct trace, tool);
1345         return trace__process_event(trace, machine, event, sample);
1346 }
1347
1348 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1349 {
1350         int err = symbol__init();
1351
1352         if (err)
1353                 return err;
1354
1355         trace->host = machine__new_host();
1356         if (trace->host == NULL)
1357                 return -ENOMEM;
1358
1359         err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1360                                             evlist->threads, trace__tool_process, false);
1361         if (err)
1362                 symbol__exit();
1363
1364         return err;
1365 }
1366
1367 static int syscall__set_arg_fmts(struct syscall *sc)
1368 {
1369         struct format_field *field;
1370         int idx = 0;
1371
1372         sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1373         if (sc->arg_scnprintf == NULL)
1374                 return -1;
1375
1376         if (sc->fmt)
1377                 sc->arg_parm = sc->fmt->arg_parm;
1378
1379         for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1380                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1381                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1382                 else if (field->flags & FIELD_IS_POINTER)
1383                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1384                 ++idx;
1385         }
1386
1387         return 0;
1388 }
1389
1390 static int trace__read_syscall_info(struct trace *trace, int id)
1391 {
1392         char tp_name[128];
1393         struct syscall *sc;
1394         const char *name = audit_syscall_to_name(id, trace->audit.machine);
1395
1396         if (name == NULL)
1397                 return -1;
1398
1399         if (id > trace->syscalls.max) {
1400                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1401
1402                 if (nsyscalls == NULL)
1403                         return -1;
1404
1405                 if (trace->syscalls.max != -1) {
1406                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1407                                (id - trace->syscalls.max) * sizeof(*sc));
1408                 } else {
1409                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1410                 }
1411
1412                 trace->syscalls.table = nsyscalls;
1413                 trace->syscalls.max   = id;
1414         }
1415
1416         sc = trace->syscalls.table + id;
1417         sc->name = name;
1418
1419         if (trace->ev_qualifier) {
1420                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1421
1422                 if (!(in ^ trace->not_ev_qualifier)) {
1423                         sc->filtered = true;
1424                         /*
1425                          * No need to do read tracepoint information since this will be
1426                          * filtered out.
1427                          */
1428                         return 0;
1429                 }
1430         }
1431
1432         sc->fmt  = syscall_fmt__find(sc->name);
1433
1434         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1435         sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1436
1437         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1438                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1439                 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1440         }
1441
1442         if (sc->tp_format == NULL)
1443                 return -1;
1444
1445         return syscall__set_arg_fmts(sc);
1446 }
1447
1448 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1449                                       unsigned long *args, struct trace *trace,
1450                                       struct thread *thread)
1451 {
1452         size_t printed = 0;
1453
1454         if (sc->tp_format != NULL) {
1455                 struct format_field *field;
1456                 u8 bit = 1;
1457                 struct syscall_arg arg = {
1458                         .idx    = 0,
1459                         .mask   = 0,
1460                         .trace  = trace,
1461                         .thread = thread,
1462                 };
1463
1464                 for (field = sc->tp_format->format.fields->next; field;
1465                      field = field->next, ++arg.idx, bit <<= 1) {
1466                         if (arg.mask & bit)
1467                                 continue;
1468                         /*
1469                          * Suppress this argument if its value is zero and
1470                          * and we don't have a string associated in an
1471                          * strarray for it.
1472                          */
1473                         if (args[arg.idx] == 0 &&
1474                             !(sc->arg_scnprintf &&
1475                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1476                               sc->arg_parm[arg.idx]))
1477                                 continue;
1478
1479                         printed += scnprintf(bf + printed, size - printed,
1480                                              "%s%s: ", printed ? ", " : "", field->name);
1481                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1482                                 arg.val = args[arg.idx];
1483                                 if (sc->arg_parm)
1484                                         arg.parm = sc->arg_parm[arg.idx];
1485                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1486                                                                       size - printed, &arg);
1487                         } else {
1488                                 printed += scnprintf(bf + printed, size - printed,
1489                                                      "%ld", args[arg.idx]);
1490                         }
1491                 }
1492         } else {
1493                 int i = 0;
1494
1495                 while (i < 6) {
1496                         printed += scnprintf(bf + printed, size - printed,
1497                                              "%sarg%d: %ld",
1498                                              printed ? ", " : "", i, args[i]);
1499                         ++i;
1500                 }
1501         }
1502
1503         return printed;
1504 }
1505
1506 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1507                                   struct perf_sample *sample);
1508
1509 static struct syscall *trace__syscall_info(struct trace *trace,
1510                                            struct perf_evsel *evsel, int id)
1511 {
1512
1513         if (id < 0) {
1514
1515                 /*
1516                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1517                  * before that, leaving at a higher verbosity level till that is
1518                  * explained. Reproduced with plain ftrace with:
1519                  *
1520                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1521                  * grep "NR -1 " /t/trace_pipe
1522                  *
1523                  * After generating some load on the machine.
1524                  */
1525                 if (verbose > 1) {
1526                         static u64 n;
1527                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1528                                 id, perf_evsel__name(evsel), ++n);
1529                 }
1530                 return NULL;
1531         }
1532
1533         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1534             trace__read_syscall_info(trace, id))
1535                 goto out_cant_read;
1536
1537         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1538                 goto out_cant_read;
1539
1540         return &trace->syscalls.table[id];
1541
1542 out_cant_read:
1543         if (verbose) {
1544                 fprintf(trace->output, "Problems reading syscall %d", id);
1545                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1546                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1547                 fputs(" information\n", trace->output);
1548         }
1549         return NULL;
1550 }
1551
1552 static void thread__update_stats(struct thread_trace *ttrace,
1553                                  int id, struct perf_sample *sample)
1554 {
1555         struct int_node *inode;
1556         struct stats *stats;
1557         u64 duration = 0;
1558
1559         inode = intlist__findnew(ttrace->syscall_stats, id);
1560         if (inode == NULL)
1561                 return;
1562
1563         stats = inode->priv;
1564         if (stats == NULL) {
1565                 stats = malloc(sizeof(struct stats));
1566                 if (stats == NULL)
1567                         return;
1568                 init_stats(stats);
1569                 inode->priv = stats;
1570         }
1571
1572         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1573                 duration = sample->time - ttrace->entry_time;
1574
1575         update_stats(stats, duration);
1576 }
1577
1578 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1579                             struct perf_sample *sample)
1580 {
1581         char *msg;
1582         void *args;
1583         size_t printed = 0;
1584         struct thread *thread;
1585         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1586         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1587         struct thread_trace *ttrace;
1588
1589         if (sc == NULL)
1590                 return -1;
1591
1592         if (sc->filtered)
1593                 return 0;
1594
1595         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1596         ttrace = thread__trace(thread, trace->output);
1597         if (ttrace == NULL)
1598                 return -1;
1599
1600         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1601         ttrace = thread->priv;
1602
1603         if (ttrace->entry_str == NULL) {
1604                 ttrace->entry_str = malloc(1024);
1605                 if (!ttrace->entry_str)
1606                         return -1;
1607         }
1608
1609         ttrace->entry_time = sample->time;
1610         msg = ttrace->entry_str;
1611         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1612
1613         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1614                                            args, trace, thread);
1615
1616         if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1617                 if (!trace->duration_filter && !trace->summary_only) {
1618                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1619                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1620                 }
1621         } else
1622                 ttrace->entry_pending = true;
1623
1624         return 0;
1625 }
1626
1627 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1628                            struct perf_sample *sample)
1629 {
1630         int ret;
1631         u64 duration = 0;
1632         struct thread *thread;
1633         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1634         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1635         struct thread_trace *ttrace;
1636
1637         if (sc == NULL)
1638                 return -1;
1639
1640         if (sc->filtered)
1641                 return 0;
1642
1643         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1644         ttrace = thread__trace(thread, trace->output);
1645         if (ttrace == NULL)
1646                 return -1;
1647
1648         if (trace->summary)
1649                 thread__update_stats(ttrace, id, sample);
1650
1651         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1652
1653         if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1654                 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1655                 trace->last_vfs_getname = NULL;
1656                 ++trace->stats.vfs_getname;
1657         }
1658
1659         ttrace = thread->priv;
1660
1661         ttrace->exit_time = sample->time;
1662
1663         if (ttrace->entry_time) {
1664                 duration = sample->time - ttrace->entry_time;
1665                 if (trace__filter_duration(trace, duration))
1666                         goto out;
1667         } else if (trace->duration_filter)
1668                 goto out;
1669
1670         if (trace->summary_only)
1671                 goto out;
1672
1673         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1674
1675         if (ttrace->entry_pending) {
1676                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1677         } else {
1678                 fprintf(trace->output, " ... [");
1679                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1680                 fprintf(trace->output, "]: %s()", sc->name);
1681         }
1682
1683         if (sc->fmt == NULL) {
1684 signed_print:
1685                 fprintf(trace->output, ") = %d", ret);
1686         } else if (ret < 0 && sc->fmt->errmsg) {
1687                 char bf[256];
1688                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1689                            *e = audit_errno_to_name(-ret);
1690
1691                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1692         } else if (ret == 0 && sc->fmt->timeout)
1693                 fprintf(trace->output, ") = 0 Timeout");
1694         else if (sc->fmt->hexret)
1695                 fprintf(trace->output, ") = %#x", ret);
1696         else
1697                 goto signed_print;
1698
1699         fputc('\n', trace->output);
1700 out:
1701         ttrace->entry_pending = false;
1702
1703         return 0;
1704 }
1705
1706 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1707                               struct perf_sample *sample)
1708 {
1709         trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1710         return 0;
1711 }
1712
1713 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1714                                      struct perf_sample *sample)
1715 {
1716         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1717         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1718         struct thread *thread = machine__findnew_thread(trace->host,
1719                                                         sample->pid,
1720                                                         sample->tid);
1721         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1722
1723         if (ttrace == NULL)
1724                 goto out_dump;
1725
1726         ttrace->runtime_ms += runtime_ms;
1727         trace->runtime_ms += runtime_ms;
1728         return 0;
1729
1730 out_dump:
1731         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1732                evsel->name,
1733                perf_evsel__strval(evsel, sample, "comm"),
1734                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1735                runtime,
1736                perf_evsel__intval(evsel, sample, "vruntime"));
1737         return 0;
1738 }
1739
1740 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1741 {
1742         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1743             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1744                 return false;
1745
1746         if (trace->pid_list || trace->tid_list)
1747                 return true;
1748
1749         return false;
1750 }
1751
1752 static int trace__process_sample(struct perf_tool *tool,
1753                                  union perf_event *event __maybe_unused,
1754                                  struct perf_sample *sample,
1755                                  struct perf_evsel *evsel,
1756                                  struct machine *machine __maybe_unused)
1757 {
1758         struct trace *trace = container_of(tool, struct trace, tool);
1759         int err = 0;
1760
1761         tracepoint_handler handler = evsel->handler;
1762
1763         if (skip_sample(trace, sample))
1764                 return 0;
1765
1766         if (!trace->full_time && trace->base_time == 0)
1767                 trace->base_time = sample->time;
1768
1769         if (handler) {
1770                 ++trace->nr_events;
1771                 handler(trace, evsel, sample);
1772         }
1773
1774         return err;
1775 }
1776
1777 static int parse_target_str(struct trace *trace)
1778 {
1779         if (trace->opts.target.pid) {
1780                 trace->pid_list = intlist__new(trace->opts.target.pid);
1781                 if (trace->pid_list == NULL) {
1782                         pr_err("Error parsing process id string\n");
1783                         return -EINVAL;
1784                 }
1785         }
1786
1787         if (trace->opts.target.tid) {
1788                 trace->tid_list = intlist__new(trace->opts.target.tid);
1789                 if (trace->tid_list == NULL) {
1790                         pr_err("Error parsing thread id string\n");
1791                         return -EINVAL;
1792                 }
1793         }
1794
1795         return 0;
1796 }
1797
1798 static int trace__record(int argc, const char **argv)
1799 {
1800         unsigned int rec_argc, i, j;
1801         const char **rec_argv;
1802         const char * const record_args[] = {
1803                 "record",
1804                 "-R",
1805                 "-m", "1024",
1806                 "-c", "1",
1807                 "-e",
1808         };
1809
1810         /* +1 is for the event string below */
1811         rec_argc = ARRAY_SIZE(record_args) + 1 + argc;
1812         rec_argv = calloc(rec_argc + 1, sizeof(char *));
1813
1814         if (rec_argv == NULL)
1815                 return -ENOMEM;
1816
1817         for (i = 0; i < ARRAY_SIZE(record_args); i++)
1818                 rec_argv[i] = record_args[i];
1819
1820         /* event string may be different for older kernels - e.g., RHEL6 */
1821         if (is_valid_tracepoint("raw_syscalls:sys_enter"))
1822                 rec_argv[i] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
1823         else if (is_valid_tracepoint("syscalls:sys_enter"))
1824                 rec_argv[i] = "syscalls:sys_enter,syscalls:sys_exit";
1825         else {
1826                 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
1827                 return -1;
1828         }
1829         i++;
1830
1831         for (j = 0; j < (unsigned int)argc; j++, i++)
1832                 rec_argv[i] = argv[j];
1833
1834         return cmd_record(i, rec_argv, NULL);
1835 }
1836
1837 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1838
1839 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1840 {
1841         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
1842         if (evsel == NULL)
1843                 return;
1844
1845         if (perf_evsel__field(evsel, "pathname") == NULL) {
1846                 perf_evsel__delete(evsel);
1847                 return;
1848         }
1849
1850         evsel->handler = trace__vfs_getname;
1851         perf_evlist__add(evlist, evsel);
1852 }
1853
1854 static int trace__run(struct trace *trace, int argc, const char **argv)
1855 {
1856         struct perf_evlist *evlist = perf_evlist__new();
1857         struct perf_evsel *evsel;
1858         int err = -1, i;
1859         unsigned long before;
1860         const bool forks = argc > 0;
1861
1862         trace->live = true;
1863
1864         if (evlist == NULL) {
1865                 fprintf(trace->output, "Not enough memory to run!\n");
1866                 goto out;
1867         }
1868
1869         if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
1870                 goto out_error_tp;
1871
1872         perf_evlist__add_vfs_getname(evlist);
1873
1874         if (trace->sched &&
1875                 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1876                                 trace__sched_stat_runtime))
1877                 goto out_error_tp;
1878
1879         err = perf_evlist__create_maps(evlist, &trace->opts.target);
1880         if (err < 0) {
1881                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1882                 goto out_delete_evlist;
1883         }
1884
1885         err = trace__symbols_init(trace, evlist);
1886         if (err < 0) {
1887                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1888                 goto out_delete_evlist;
1889         }
1890
1891         perf_evlist__config(evlist, &trace->opts);
1892
1893         signal(SIGCHLD, sig_handler);
1894         signal(SIGINT, sig_handler);
1895
1896         if (forks) {
1897                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1898                                                     argv, false, NULL);
1899                 if (err < 0) {
1900                         fprintf(trace->output, "Couldn't run the workload!\n");
1901                         goto out_delete_evlist;
1902                 }
1903         }
1904
1905         err = perf_evlist__open(evlist);
1906         if (err < 0)
1907                 goto out_error_open;
1908
1909         err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
1910         if (err < 0) {
1911                 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1912                 goto out_delete_evlist;
1913         }
1914
1915         perf_evlist__enable(evlist);
1916
1917         if (forks)
1918                 perf_evlist__start_workload(evlist);
1919
1920         trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1921 again:
1922         before = trace->nr_events;
1923
1924         for (i = 0; i < evlist->nr_mmaps; i++) {
1925                 union perf_event *event;
1926
1927                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1928                         const u32 type = event->header.type;
1929                         tracepoint_handler handler;
1930                         struct perf_sample sample;
1931
1932                         ++trace->nr_events;
1933
1934                         err = perf_evlist__parse_sample(evlist, event, &sample);
1935                         if (err) {
1936                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1937                                 goto next_event;
1938                         }
1939
1940                         if (!trace->full_time && trace->base_time == 0)
1941                                 trace->base_time = sample.time;
1942
1943                         if (type != PERF_RECORD_SAMPLE) {
1944                                 trace__process_event(trace, trace->host, event, &sample);
1945                                 continue;
1946                         }
1947
1948                         evsel = perf_evlist__id2evsel(evlist, sample.id);
1949                         if (evsel == NULL) {
1950                                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1951                                 goto next_event;
1952                         }
1953
1954                         if (sample.raw_data == NULL) {
1955                                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1956                                        perf_evsel__name(evsel), sample.tid,
1957                                        sample.cpu, sample.raw_size);
1958                                 goto next_event;
1959                         }
1960
1961                         handler = evsel->handler;
1962                         handler(trace, evsel, &sample);
1963 next_event:
1964                         perf_evlist__mmap_consume(evlist, i);
1965
1966                         if (interrupted)
1967                                 goto out_disable;
1968                 }
1969         }
1970
1971         if (trace->nr_events == before) {
1972                 int timeout = done ? 100 : -1;
1973
1974                 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
1975                         goto again;
1976         } else {
1977                 goto again;
1978         }
1979
1980 out_disable:
1981         perf_evlist__disable(evlist);
1982
1983         if (!err) {
1984                 if (trace->summary)
1985                         trace__fprintf_thread_summary(trace, trace->output);
1986
1987                 if (trace->show_tool_stats) {
1988                         fprintf(trace->output, "Stats:\n "
1989                                                " vfs_getname : %" PRIu64 "\n"
1990                                                " proc_getname: %" PRIu64 "\n",
1991                                 trace->stats.vfs_getname,
1992                                 trace->stats.proc_getname);
1993                 }
1994         }
1995
1996         perf_evlist__munmap(evlist);
1997 out_delete_evlist:
1998         perf_evlist__delete(evlist);
1999 out:
2000         trace->live = false;
2001         return err;
2002 {
2003         char errbuf[BUFSIZ];
2004
2005 out_error_tp:
2006         perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
2007         goto out_error;
2008
2009 out_error_open:
2010         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2011
2012 out_error:
2013         fprintf(trace->output, "%s\n", errbuf);
2014         goto out_delete_evlist;
2015 }
2016 }
2017
2018 static int trace__replay(struct trace *trace)
2019 {
2020         const struct perf_evsel_str_handler handlers[] = {
2021                 { "probe:vfs_getname",       trace__vfs_getname, },
2022         };
2023         struct perf_data_file file = {
2024                 .path  = input_name,
2025                 .mode  = PERF_DATA_MODE_READ,
2026         };
2027         struct perf_session *session;
2028         struct perf_evsel *evsel;
2029         int err = -1;
2030
2031         trace->tool.sample        = trace__process_sample;
2032         trace->tool.mmap          = perf_event__process_mmap;
2033         trace->tool.mmap2         = perf_event__process_mmap2;
2034         trace->tool.comm          = perf_event__process_comm;
2035         trace->tool.exit          = perf_event__process_exit;
2036         trace->tool.fork          = perf_event__process_fork;
2037         trace->tool.attr          = perf_event__process_attr;
2038         trace->tool.tracing_data = perf_event__process_tracing_data;
2039         trace->tool.build_id      = perf_event__process_build_id;
2040
2041         trace->tool.ordered_samples = true;
2042         trace->tool.ordering_requires_timestamps = true;
2043
2044         /* add tid to output */
2045         trace->multiple_threads = true;
2046
2047         if (symbol__init() < 0)
2048                 return -1;
2049
2050         session = perf_session__new(&file, false, &trace->tool);
2051         if (session == NULL)
2052                 return -ENOMEM;
2053
2054         trace->host = &session->machines.host;
2055
2056         err = perf_session__set_tracepoints_handlers(session, handlers);
2057         if (err)
2058                 goto out;
2059
2060         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2061                                                      "raw_syscalls:sys_enter");
2062         /* older kernels have syscalls tp versus raw_syscalls */
2063         if (evsel == NULL)
2064                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2065                                                              "syscalls:sys_enter");
2066         if (evsel == NULL) {
2067                 pr_err("Data file does not have raw_syscalls:sys_enter event\n");
2068                 goto out;
2069         }
2070
2071         if (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2072             perf_evsel__init_sc_tp_ptr_field(evsel, args)) {
2073                 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2074                 goto out;
2075         }
2076
2077         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2078                                                      "raw_syscalls:sys_exit");
2079         if (evsel == NULL)
2080                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2081                                                              "syscalls:sys_exit");
2082         if (evsel == NULL) {
2083                 pr_err("Data file does not have raw_syscalls:sys_exit event\n");
2084                 goto out;
2085         }
2086
2087         if (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2088             perf_evsel__init_sc_tp_uint_field(evsel, ret)) {
2089                 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2090                 goto out;
2091         }
2092
2093         err = parse_target_str(trace);
2094         if (err != 0)
2095                 goto out;
2096
2097         setup_pager();
2098
2099         err = perf_session__process_events(session, &trace->tool);
2100         if (err)
2101                 pr_err("Failed to process events, error %d", err);
2102
2103         else if (trace->summary)
2104                 trace__fprintf_thread_summary(trace, trace->output);
2105
2106 out:
2107         perf_session__delete(session);
2108
2109         return err;
2110 }
2111
2112 static size_t trace__fprintf_threads_header(FILE *fp)
2113 {
2114         size_t printed;
2115
2116         printed  = fprintf(fp, "\n Summary of events:\n\n");
2117
2118         return printed;
2119 }
2120
2121 static size_t thread__dump_stats(struct thread_trace *ttrace,
2122                                  struct trace *trace, FILE *fp)
2123 {
2124         struct stats *stats;
2125         size_t printed = 0;
2126         struct syscall *sc;
2127         struct int_node *inode = intlist__first(ttrace->syscall_stats);
2128
2129         if (inode == NULL)
2130                 return 0;
2131
2132         printed += fprintf(fp, "\n");
2133
2134         printed += fprintf(fp, "   syscall            calls      min       avg       max      stddev\n");
2135         printed += fprintf(fp, "                               (msec)    (msec)    (msec)        (%%)\n");
2136         printed += fprintf(fp, "   --------------- -------- --------- --------- ---------     ------\n");
2137
2138         /* each int_node is a syscall */
2139         while (inode) {
2140                 stats = inode->priv;
2141                 if (stats) {
2142                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2143                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2144                         double avg = avg_stats(stats);
2145                         double pct;
2146                         u64 n = (u64) stats->n;
2147
2148                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2149                         avg /= NSEC_PER_MSEC;
2150
2151                         sc = &trace->syscalls.table[inode->i];
2152                         printed += fprintf(fp, "   %-15s", sc->name);
2153                         printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2154                                            n, min, avg);
2155                         printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2156                 }
2157
2158                 inode = intlist__next(inode);
2159         }
2160
2161         printed += fprintf(fp, "\n\n");
2162
2163         return printed;
2164 }
2165
2166 /* struct used to pass data to per-thread function */
2167 struct summary_data {
2168         FILE *fp;
2169         struct trace *trace;
2170         size_t printed;
2171 };
2172
2173 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2174 {
2175         struct summary_data *data = priv;
2176         FILE *fp = data->fp;
2177         size_t printed = data->printed;
2178         struct trace *trace = data->trace;
2179         struct thread_trace *ttrace = thread->priv;
2180         double ratio;
2181
2182         if (ttrace == NULL)
2183                 return 0;
2184
2185         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2186
2187         printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2188         printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2189         printed += fprintf(fp, "%.1f%%", ratio);
2190         printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2191         printed += thread__dump_stats(ttrace, trace, fp);
2192
2193         data->printed += printed;
2194
2195         return 0;
2196 }
2197
2198 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2199 {
2200         struct summary_data data = {
2201                 .fp = fp,
2202                 .trace = trace
2203         };
2204         data.printed = trace__fprintf_threads_header(fp);
2205
2206         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2207
2208         return data.printed;
2209 }
2210
2211 static int trace__set_duration(const struct option *opt, const char *str,
2212                                int unset __maybe_unused)
2213 {
2214         struct trace *trace = opt->value;
2215
2216         trace->duration_filter = atof(str);
2217         return 0;
2218 }
2219
2220 static int trace__open_output(struct trace *trace, const char *filename)
2221 {
2222         struct stat st;
2223
2224         if (!stat(filename, &st) && st.st_size) {
2225                 char oldname[PATH_MAX];
2226
2227                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2228                 unlink(oldname);
2229                 rename(filename, oldname);
2230         }
2231
2232         trace->output = fopen(filename, "w");
2233
2234         return trace->output == NULL ? -errno : 0;
2235 }
2236
2237 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2238 {
2239         const char * const trace_usage[] = {
2240                 "perf trace [<options>] [<command>]",
2241                 "perf trace [<options>] -- <command> [<options>]",
2242                 "perf trace record [<options>] [<command>]",
2243                 "perf trace record [<options>] -- <command> [<options>]",
2244                 NULL
2245         };
2246         struct trace trace = {
2247                 .audit = {
2248                         .machine = audit_detect_machine(),
2249                         .open_id = audit_name_to_syscall("open", trace.audit.machine),
2250                 },
2251                 .syscalls = {
2252                         . max = -1,
2253                 },
2254                 .opts = {
2255                         .target = {
2256                                 .uid       = UINT_MAX,
2257                                 .uses_mmap = true,
2258                         },
2259                         .user_freq     = UINT_MAX,
2260                         .user_interval = ULLONG_MAX,
2261                         .no_delay      = true,
2262                         .mmap_pages    = 1024,
2263                 },
2264                 .output = stdout,
2265                 .show_comm = true,
2266         };
2267         const char *output_name = NULL;
2268         const char *ev_qualifier_str = NULL;
2269         const struct option trace_options[] = {
2270         OPT_BOOLEAN(0, "comm", &trace.show_comm,
2271                     "show the thread COMM next to its id"),
2272         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2273         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2274                     "list of events to trace"),
2275         OPT_STRING('o', "output", &output_name, "file", "output file name"),
2276         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2277         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2278                     "trace events on existing process id"),
2279         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2280                     "trace events on existing thread id"),
2281         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2282                     "system-wide collection from all CPUs"),
2283         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2284                     "list of cpus to monitor"),
2285         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2286                     "child tasks do not inherit counters"),
2287         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2288                      "number of mmap data pages",
2289                      perf_evlist__parse_mmap_pages),
2290         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2291                    "user to profile"),
2292         OPT_CALLBACK(0, "duration", &trace, "float",
2293                      "show only events with duration > N.M ms",
2294                      trace__set_duration),
2295         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2296         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2297         OPT_BOOLEAN('T', "time", &trace.full_time,
2298                     "Show full timestamp, not time relative to first start"),
2299         OPT_BOOLEAN('s', "summary", &trace.summary_only,
2300                     "Show only syscall summary with statistics"),
2301         OPT_BOOLEAN('S', "with-summary", &trace.summary,
2302                     "Show all syscalls and summary with statistics"),
2303         OPT_END()
2304         };
2305         int err;
2306         char bf[BUFSIZ];
2307
2308         if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2309                 return trace__record(argc-2, &argv[2]);
2310
2311         argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2312
2313         /* summary_only implies summary option, but don't overwrite summary if set */
2314         if (trace.summary_only)
2315                 trace.summary = trace.summary_only;
2316
2317         if (output_name != NULL) {
2318                 err = trace__open_output(&trace, output_name);
2319                 if (err < 0) {
2320                         perror("failed to create output file");
2321                         goto out;
2322                 }
2323         }
2324
2325         if (ev_qualifier_str != NULL) {
2326                 const char *s = ev_qualifier_str;
2327
2328                 trace.not_ev_qualifier = *s == '!';
2329                 if (trace.not_ev_qualifier)
2330                         ++s;
2331                 trace.ev_qualifier = strlist__new(true, s);
2332                 if (trace.ev_qualifier == NULL) {
2333                         fputs("Not enough memory to parse event qualifier",
2334                               trace.output);
2335                         err = -ENOMEM;
2336                         goto out_close;
2337                 }
2338         }
2339
2340         err = target__validate(&trace.opts.target);
2341         if (err) {
2342                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2343                 fprintf(trace.output, "%s", bf);
2344                 goto out_close;
2345         }
2346
2347         err = target__parse_uid(&trace.opts.target);
2348         if (err) {
2349                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2350                 fprintf(trace.output, "%s", bf);
2351                 goto out_close;
2352         }
2353
2354         if (!argc && target__none(&trace.opts.target))
2355                 trace.opts.target.system_wide = true;
2356
2357         if (input_name)
2358                 err = trace__replay(&trace);
2359         else
2360                 err = trace__run(&trace, argc, argv);
2361
2362 out_close:
2363         if (output_name != NULL)
2364                 fclose(trace.output);
2365 out:
2366         return err;
2367 }