]> Pileus Git - ~andy/linux/blob - tools/perf/builtin-trace.c
Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm...
[~andy/linux] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
15 #include "util/parse-events.h"
16
17 #include <libaudit.h>
18 #include <stdlib.h>
19 #include <sys/eventfd.h>
20 #include <sys/mman.h>
21 #include <linux/futex.h>
22
23 /* For older distros: */
24 #ifndef MAP_STACK
25 # define MAP_STACK              0x20000
26 #endif
27
28 #ifndef MADV_HWPOISON
29 # define MADV_HWPOISON          100
30 #endif
31
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE         12
34 #endif
35
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE       13
38 #endif
39
40 #ifndef EFD_SEMAPHORE
41 # define EFD_SEMAPHORE          1
42 #endif
43
44 struct tp_field {
45         int offset;
46         union {
47                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
48                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
49         };
50 };
51
52 #define TP_UINT_FIELD(bits) \
53 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
54 { \
55         return *(u##bits *)(sample->raw_data + field->offset); \
56 }
57
58 TP_UINT_FIELD(8);
59 TP_UINT_FIELD(16);
60 TP_UINT_FIELD(32);
61 TP_UINT_FIELD(64);
62
63 #define TP_UINT_FIELD__SWAPPED(bits) \
64 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
65 { \
66         u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
67         return bswap_##bits(value);\
68 }
69
70 TP_UINT_FIELD__SWAPPED(16);
71 TP_UINT_FIELD__SWAPPED(32);
72 TP_UINT_FIELD__SWAPPED(64);
73
74 static int tp_field__init_uint(struct tp_field *field,
75                                struct format_field *format_field,
76                                bool needs_swap)
77 {
78         field->offset = format_field->offset;
79
80         switch (format_field->size) {
81         case 1:
82                 field->integer = tp_field__u8;
83                 break;
84         case 2:
85                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
86                 break;
87         case 4:
88                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
89                 break;
90         case 8:
91                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
92                 break;
93         default:
94                 return -1;
95         }
96
97         return 0;
98 }
99
100 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
101 {
102         return sample->raw_data + field->offset;
103 }
104
105 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
106 {
107         field->offset = format_field->offset;
108         field->pointer = tp_field__ptr;
109         return 0;
110 }
111
112 struct syscall_tp {
113         struct tp_field id;
114         union {
115                 struct tp_field args, ret;
116         };
117 };
118
119 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
120                                           struct tp_field *field,
121                                           const char *name)
122 {
123         struct format_field *format_field = perf_evsel__field(evsel, name);
124
125         if (format_field == NULL)
126                 return -1;
127
128         return tp_field__init_uint(field, format_field, evsel->needs_swap);
129 }
130
131 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
132         ({ struct syscall_tp *sc = evsel->priv;\
133            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
134
135 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
136                                          struct tp_field *field,
137                                          const char *name)
138 {
139         struct format_field *format_field = perf_evsel__field(evsel, name);
140
141         if (format_field == NULL)
142                 return -1;
143
144         return tp_field__init_ptr(field, format_field);
145 }
146
147 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
148         ({ struct syscall_tp *sc = evsel->priv;\
149            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
150
151 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
152 {
153         zfree(&evsel->priv);
154         perf_evsel__delete(evsel);
155 }
156
157 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
158 {
159         evsel->priv = malloc(sizeof(struct syscall_tp));
160         if (evsel->priv != NULL) {
161                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
162                         goto out_delete;
163
164                 evsel->handler = handler;
165                 return 0;
166         }
167
168         return -ENOMEM;
169
170 out_delete:
171         zfree(&evsel->priv);
172         return -ENOENT;
173 }
174
175 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
176 {
177         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
178
179         /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
180         if (evsel == NULL)
181                 evsel = perf_evsel__newtp("syscalls", direction);
182
183         if (evsel) {
184                 if (perf_evsel__init_syscall_tp(evsel, handler))
185                         goto out_delete;
186         }
187
188         return evsel;
189
190 out_delete:
191         perf_evsel__delete_priv(evsel);
192         return NULL;
193 }
194
195 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
196         ({ struct syscall_tp *fields = evsel->priv; \
197            fields->name.integer(&fields->name, sample); })
198
199 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
200         ({ struct syscall_tp *fields = evsel->priv; \
201            fields->name.pointer(&fields->name, sample); })
202
203 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
204                                           void *sys_enter_handler,
205                                           void *sys_exit_handler)
206 {
207         int ret = -1;
208         struct perf_evsel *sys_enter, *sys_exit;
209
210         sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
211         if (sys_enter == NULL)
212                 goto out;
213
214         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
215                 goto out_delete_sys_enter;
216
217         sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
218         if (sys_exit == NULL)
219                 goto out_delete_sys_enter;
220
221         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
222                 goto out_delete_sys_exit;
223
224         perf_evlist__add(evlist, sys_enter);
225         perf_evlist__add(evlist, sys_exit);
226
227         ret = 0;
228 out:
229         return ret;
230
231 out_delete_sys_exit:
232         perf_evsel__delete_priv(sys_exit);
233 out_delete_sys_enter:
234         perf_evsel__delete_priv(sys_enter);
235         goto out;
236 }
237
238
239 struct syscall_arg {
240         unsigned long val;
241         struct thread *thread;
242         struct trace  *trace;
243         void          *parm;
244         u8            idx;
245         u8            mask;
246 };
247
248 struct strarray {
249         int         offset;
250         int         nr_entries;
251         const char **entries;
252 };
253
254 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
255         .nr_entries = ARRAY_SIZE(array), \
256         .entries = array, \
257 }
258
259 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
260         .offset     = off, \
261         .nr_entries = ARRAY_SIZE(array), \
262         .entries = array, \
263 }
264
265 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
266                                                 const char *intfmt,
267                                                 struct syscall_arg *arg)
268 {
269         struct strarray *sa = arg->parm;
270         int idx = arg->val - sa->offset;
271
272         if (idx < 0 || idx >= sa->nr_entries)
273                 return scnprintf(bf, size, intfmt, arg->val);
274
275         return scnprintf(bf, size, "%s", sa->entries[idx]);
276 }
277
278 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
279                                               struct syscall_arg *arg)
280 {
281         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
282 }
283
284 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
285
286 #if defined(__i386__) || defined(__x86_64__)
287 /*
288  * FIXME: Make this available to all arches as soon as the ioctl beautifier
289  *        gets rewritten to support all arches.
290  */
291 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
292                                                  struct syscall_arg *arg)
293 {
294         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
295 }
296
297 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
298 #endif /* defined(__i386__) || defined(__x86_64__) */
299
300 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
301                                         struct syscall_arg *arg);
302
303 #define SCA_FD syscall_arg__scnprintf_fd
304
305 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
306                                            struct syscall_arg *arg)
307 {
308         int fd = arg->val;
309
310         if (fd == AT_FDCWD)
311                 return scnprintf(bf, size, "CWD");
312
313         return syscall_arg__scnprintf_fd(bf, size, arg);
314 }
315
316 #define SCA_FDAT syscall_arg__scnprintf_fd_at
317
318 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
319                                               struct syscall_arg *arg);
320
321 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
322
323 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
324                                          struct syscall_arg *arg)
325 {
326         return scnprintf(bf, size, "%#lx", arg->val);
327 }
328
329 #define SCA_HEX syscall_arg__scnprintf_hex
330
331 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
332                                                struct syscall_arg *arg)
333 {
334         int printed = 0, prot = arg->val;
335
336         if (prot == PROT_NONE)
337                 return scnprintf(bf, size, "NONE");
338 #define P_MMAP_PROT(n) \
339         if (prot & PROT_##n) { \
340                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
341                 prot &= ~PROT_##n; \
342         }
343
344         P_MMAP_PROT(EXEC);
345         P_MMAP_PROT(READ);
346         P_MMAP_PROT(WRITE);
347 #ifdef PROT_SEM
348         P_MMAP_PROT(SEM);
349 #endif
350         P_MMAP_PROT(GROWSDOWN);
351         P_MMAP_PROT(GROWSUP);
352 #undef P_MMAP_PROT
353
354         if (prot)
355                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
356
357         return printed;
358 }
359
360 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
361
362 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
363                                                 struct syscall_arg *arg)
364 {
365         int printed = 0, flags = arg->val;
366
367 #define P_MMAP_FLAG(n) \
368         if (flags & MAP_##n) { \
369                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
370                 flags &= ~MAP_##n; \
371         }
372
373         P_MMAP_FLAG(SHARED);
374         P_MMAP_FLAG(PRIVATE);
375 #ifdef MAP_32BIT
376         P_MMAP_FLAG(32BIT);
377 #endif
378         P_MMAP_FLAG(ANONYMOUS);
379         P_MMAP_FLAG(DENYWRITE);
380         P_MMAP_FLAG(EXECUTABLE);
381         P_MMAP_FLAG(FILE);
382         P_MMAP_FLAG(FIXED);
383         P_MMAP_FLAG(GROWSDOWN);
384 #ifdef MAP_HUGETLB
385         P_MMAP_FLAG(HUGETLB);
386 #endif
387         P_MMAP_FLAG(LOCKED);
388         P_MMAP_FLAG(NONBLOCK);
389         P_MMAP_FLAG(NORESERVE);
390         P_MMAP_FLAG(POPULATE);
391         P_MMAP_FLAG(STACK);
392 #ifdef MAP_UNINITIALIZED
393         P_MMAP_FLAG(UNINITIALIZED);
394 #endif
395 #undef P_MMAP_FLAG
396
397         if (flags)
398                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
399
400         return printed;
401 }
402
403 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
404
405 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
406                                                       struct syscall_arg *arg)
407 {
408         int behavior = arg->val;
409
410         switch (behavior) {
411 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
412         P_MADV_BHV(NORMAL);
413         P_MADV_BHV(RANDOM);
414         P_MADV_BHV(SEQUENTIAL);
415         P_MADV_BHV(WILLNEED);
416         P_MADV_BHV(DONTNEED);
417         P_MADV_BHV(REMOVE);
418         P_MADV_BHV(DONTFORK);
419         P_MADV_BHV(DOFORK);
420         P_MADV_BHV(HWPOISON);
421 #ifdef MADV_SOFT_OFFLINE
422         P_MADV_BHV(SOFT_OFFLINE);
423 #endif
424         P_MADV_BHV(MERGEABLE);
425         P_MADV_BHV(UNMERGEABLE);
426 #ifdef MADV_HUGEPAGE
427         P_MADV_BHV(HUGEPAGE);
428 #endif
429 #ifdef MADV_NOHUGEPAGE
430         P_MADV_BHV(NOHUGEPAGE);
431 #endif
432 #ifdef MADV_DONTDUMP
433         P_MADV_BHV(DONTDUMP);
434 #endif
435 #ifdef MADV_DODUMP
436         P_MADV_BHV(DODUMP);
437 #endif
438 #undef P_MADV_PHV
439         default: break;
440         }
441
442         return scnprintf(bf, size, "%#x", behavior);
443 }
444
445 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
446
447 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
448                                            struct syscall_arg *arg)
449 {
450         int printed = 0, op = arg->val;
451
452         if (op == 0)
453                 return scnprintf(bf, size, "NONE");
454 #define P_CMD(cmd) \
455         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
456                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
457                 op &= ~LOCK_##cmd; \
458         }
459
460         P_CMD(SH);
461         P_CMD(EX);
462         P_CMD(NB);
463         P_CMD(UN);
464         P_CMD(MAND);
465         P_CMD(RW);
466         P_CMD(READ);
467         P_CMD(WRITE);
468 #undef P_OP
469
470         if (op)
471                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
472
473         return printed;
474 }
475
476 #define SCA_FLOCK syscall_arg__scnprintf_flock
477
478 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
479 {
480         enum syscall_futex_args {
481                 SCF_UADDR   = (1 << 0),
482                 SCF_OP      = (1 << 1),
483                 SCF_VAL     = (1 << 2),
484                 SCF_TIMEOUT = (1 << 3),
485                 SCF_UADDR2  = (1 << 4),
486                 SCF_VAL3    = (1 << 5),
487         };
488         int op = arg->val;
489         int cmd = op & FUTEX_CMD_MASK;
490         size_t printed = 0;
491
492         switch (cmd) {
493 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
494         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
495         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
496         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
497         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
498         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
499         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
500         P_FUTEX_OP(WAKE_OP);                                                      break;
501         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
502         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
503         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
504         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
505         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
506         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
507         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
508         }
509
510         if (op & FUTEX_PRIVATE_FLAG)
511                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
512
513         if (op & FUTEX_CLOCK_REALTIME)
514                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
515
516         return printed;
517 }
518
519 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
520
521 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
522 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
523
524 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
525 static DEFINE_STRARRAY(itimers);
526
527 static const char *whences[] = { "SET", "CUR", "END",
528 #ifdef SEEK_DATA
529 "DATA",
530 #endif
531 #ifdef SEEK_HOLE
532 "HOLE",
533 #endif
534 };
535 static DEFINE_STRARRAY(whences);
536
537 static const char *fcntl_cmds[] = {
538         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
539         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
540         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
541         "F_GETOWNER_UIDS",
542 };
543 static DEFINE_STRARRAY(fcntl_cmds);
544
545 static const char *rlimit_resources[] = {
546         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
547         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
548         "RTTIME",
549 };
550 static DEFINE_STRARRAY(rlimit_resources);
551
552 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
553 static DEFINE_STRARRAY(sighow);
554
555 static const char *clockid[] = {
556         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
557         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
558 };
559 static DEFINE_STRARRAY(clockid);
560
561 static const char *socket_families[] = {
562         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
563         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
564         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
565         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
566         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
567         "ALG", "NFC", "VSOCK",
568 };
569 static DEFINE_STRARRAY(socket_families);
570
571 #ifndef SOCK_TYPE_MASK
572 #define SOCK_TYPE_MASK 0xf
573 #endif
574
575 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
576                                                       struct syscall_arg *arg)
577 {
578         size_t printed;
579         int type = arg->val,
580             flags = type & ~SOCK_TYPE_MASK;
581
582         type &= SOCK_TYPE_MASK;
583         /*
584          * Can't use a strarray, MIPS may override for ABI reasons.
585          */
586         switch (type) {
587 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
588         P_SK_TYPE(STREAM);
589         P_SK_TYPE(DGRAM);
590         P_SK_TYPE(RAW);
591         P_SK_TYPE(RDM);
592         P_SK_TYPE(SEQPACKET);
593         P_SK_TYPE(DCCP);
594         P_SK_TYPE(PACKET);
595 #undef P_SK_TYPE
596         default:
597                 printed = scnprintf(bf, size, "%#x", type);
598         }
599
600 #define P_SK_FLAG(n) \
601         if (flags & SOCK_##n) { \
602                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
603                 flags &= ~SOCK_##n; \
604         }
605
606         P_SK_FLAG(CLOEXEC);
607         P_SK_FLAG(NONBLOCK);
608 #undef P_SK_FLAG
609
610         if (flags)
611                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
612
613         return printed;
614 }
615
616 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
617
618 #ifndef MSG_PROBE
619 #define MSG_PROBE            0x10
620 #endif
621 #ifndef MSG_WAITFORONE
622 #define MSG_WAITFORONE  0x10000
623 #endif
624 #ifndef MSG_SENDPAGE_NOTLAST
625 #define MSG_SENDPAGE_NOTLAST 0x20000
626 #endif
627 #ifndef MSG_FASTOPEN
628 #define MSG_FASTOPEN         0x20000000
629 #endif
630
631 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
632                                                struct syscall_arg *arg)
633 {
634         int printed = 0, flags = arg->val;
635
636         if (flags == 0)
637                 return scnprintf(bf, size, "NONE");
638 #define P_MSG_FLAG(n) \
639         if (flags & MSG_##n) { \
640                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
641                 flags &= ~MSG_##n; \
642         }
643
644         P_MSG_FLAG(OOB);
645         P_MSG_FLAG(PEEK);
646         P_MSG_FLAG(DONTROUTE);
647         P_MSG_FLAG(TRYHARD);
648         P_MSG_FLAG(CTRUNC);
649         P_MSG_FLAG(PROBE);
650         P_MSG_FLAG(TRUNC);
651         P_MSG_FLAG(DONTWAIT);
652         P_MSG_FLAG(EOR);
653         P_MSG_FLAG(WAITALL);
654         P_MSG_FLAG(FIN);
655         P_MSG_FLAG(SYN);
656         P_MSG_FLAG(CONFIRM);
657         P_MSG_FLAG(RST);
658         P_MSG_FLAG(ERRQUEUE);
659         P_MSG_FLAG(NOSIGNAL);
660         P_MSG_FLAG(MORE);
661         P_MSG_FLAG(WAITFORONE);
662         P_MSG_FLAG(SENDPAGE_NOTLAST);
663         P_MSG_FLAG(FASTOPEN);
664         P_MSG_FLAG(CMSG_CLOEXEC);
665 #undef P_MSG_FLAG
666
667         if (flags)
668                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
669
670         return printed;
671 }
672
673 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
674
675 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
676                                                  struct syscall_arg *arg)
677 {
678         size_t printed = 0;
679         int mode = arg->val;
680
681         if (mode == F_OK) /* 0 */
682                 return scnprintf(bf, size, "F");
683 #define P_MODE(n) \
684         if (mode & n##_OK) { \
685                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
686                 mode &= ~n##_OK; \
687         }
688
689         P_MODE(R);
690         P_MODE(W);
691         P_MODE(X);
692 #undef P_MODE
693
694         if (mode)
695                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
696
697         return printed;
698 }
699
700 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
701
702 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
703                                                struct syscall_arg *arg)
704 {
705         int printed = 0, flags = arg->val;
706
707         if (!(flags & O_CREAT))
708                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
709
710         if (flags == 0)
711                 return scnprintf(bf, size, "RDONLY");
712 #define P_FLAG(n) \
713         if (flags & O_##n) { \
714                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
715                 flags &= ~O_##n; \
716         }
717
718         P_FLAG(APPEND);
719         P_FLAG(ASYNC);
720         P_FLAG(CLOEXEC);
721         P_FLAG(CREAT);
722         P_FLAG(DIRECT);
723         P_FLAG(DIRECTORY);
724         P_FLAG(EXCL);
725         P_FLAG(LARGEFILE);
726         P_FLAG(NOATIME);
727         P_FLAG(NOCTTY);
728 #ifdef O_NONBLOCK
729         P_FLAG(NONBLOCK);
730 #elif O_NDELAY
731         P_FLAG(NDELAY);
732 #endif
733 #ifdef O_PATH
734         P_FLAG(PATH);
735 #endif
736         P_FLAG(RDWR);
737 #ifdef O_DSYNC
738         if ((flags & O_SYNC) == O_SYNC)
739                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
740         else {
741                 P_FLAG(DSYNC);
742         }
743 #else
744         P_FLAG(SYNC);
745 #endif
746         P_FLAG(TRUNC);
747         P_FLAG(WRONLY);
748 #undef P_FLAG
749
750         if (flags)
751                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
752
753         return printed;
754 }
755
756 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
757
758 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
759                                                    struct syscall_arg *arg)
760 {
761         int printed = 0, flags = arg->val;
762
763         if (flags == 0)
764                 return scnprintf(bf, size, "NONE");
765 #define P_FLAG(n) \
766         if (flags & EFD_##n) { \
767                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
768                 flags &= ~EFD_##n; \
769         }
770
771         P_FLAG(SEMAPHORE);
772         P_FLAG(CLOEXEC);
773         P_FLAG(NONBLOCK);
774 #undef P_FLAG
775
776         if (flags)
777                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
778
779         return printed;
780 }
781
782 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
783
784 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
785                                                 struct syscall_arg *arg)
786 {
787         int printed = 0, flags = arg->val;
788
789 #define P_FLAG(n) \
790         if (flags & O_##n) { \
791                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
792                 flags &= ~O_##n; \
793         }
794
795         P_FLAG(CLOEXEC);
796         P_FLAG(NONBLOCK);
797 #undef P_FLAG
798
799         if (flags)
800                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
801
802         return printed;
803 }
804
805 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
806
807 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
808 {
809         int sig = arg->val;
810
811         switch (sig) {
812 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
813         P_SIGNUM(HUP);
814         P_SIGNUM(INT);
815         P_SIGNUM(QUIT);
816         P_SIGNUM(ILL);
817         P_SIGNUM(TRAP);
818         P_SIGNUM(ABRT);
819         P_SIGNUM(BUS);
820         P_SIGNUM(FPE);
821         P_SIGNUM(KILL);
822         P_SIGNUM(USR1);
823         P_SIGNUM(SEGV);
824         P_SIGNUM(USR2);
825         P_SIGNUM(PIPE);
826         P_SIGNUM(ALRM);
827         P_SIGNUM(TERM);
828         P_SIGNUM(CHLD);
829         P_SIGNUM(CONT);
830         P_SIGNUM(STOP);
831         P_SIGNUM(TSTP);
832         P_SIGNUM(TTIN);
833         P_SIGNUM(TTOU);
834         P_SIGNUM(URG);
835         P_SIGNUM(XCPU);
836         P_SIGNUM(XFSZ);
837         P_SIGNUM(VTALRM);
838         P_SIGNUM(PROF);
839         P_SIGNUM(WINCH);
840         P_SIGNUM(IO);
841         P_SIGNUM(PWR);
842         P_SIGNUM(SYS);
843 #ifdef SIGEMT
844         P_SIGNUM(EMT);
845 #endif
846 #ifdef SIGSTKFLT
847         P_SIGNUM(STKFLT);
848 #endif
849 #ifdef SIGSWI
850         P_SIGNUM(SWI);
851 #endif
852         default: break;
853         }
854
855         return scnprintf(bf, size, "%#x", sig);
856 }
857
858 #define SCA_SIGNUM syscall_arg__scnprintf_signum
859
860 #if defined(__i386__) || defined(__x86_64__)
861 /*
862  * FIXME: Make this available to all arches.
863  */
864 #define TCGETS          0x5401
865
866 static const char *tioctls[] = {
867         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
868         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
869         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
870         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
871         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
872         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
873         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
874         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
875         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
876         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
877         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
878         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
879         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
880         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
881         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
882 };
883
884 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
885 #endif /* defined(__i386__) || defined(__x86_64__) */
886
887 #define STRARRAY(arg, name, array) \
888           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
889           .arg_parm      = { [arg] = &strarray__##array, }
890
891 static struct syscall_fmt {
892         const char *name;
893         const char *alias;
894         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
895         void       *arg_parm[6];
896         bool       errmsg;
897         bool       timeout;
898         bool       hexret;
899 } syscall_fmts[] = {
900         { .name     = "access",     .errmsg = true,
901           .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
902         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
903         { .name     = "brk",        .hexret = true,
904           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
905         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
906         { .name     = "close",      .errmsg = true,
907           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, 
908         { .name     = "connect",    .errmsg = true, },
909         { .name     = "dup",        .errmsg = true,
910           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
911         { .name     = "dup2",       .errmsg = true,
912           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
913         { .name     = "dup3",       .errmsg = true,
914           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
915         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
916         { .name     = "eventfd2",   .errmsg = true,
917           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
918         { .name     = "faccessat",  .errmsg = true,
919           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
920         { .name     = "fadvise64",  .errmsg = true,
921           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
922         { .name     = "fallocate",  .errmsg = true,
923           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
924         { .name     = "fchdir",     .errmsg = true,
925           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
926         { .name     = "fchmod",     .errmsg = true,
927           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
928         { .name     = "fchmodat",   .errmsg = true,
929           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
930         { .name     = "fchown",     .errmsg = true,
931           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
932         { .name     = "fchownat",   .errmsg = true,
933           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
934         { .name     = "fcntl",      .errmsg = true,
935           .arg_scnprintf = { [0] = SCA_FD, /* fd */
936                              [1] = SCA_STRARRAY, /* cmd */ },
937           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
938         { .name     = "fdatasync",  .errmsg = true,
939           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
940         { .name     = "flock",      .errmsg = true,
941           .arg_scnprintf = { [0] = SCA_FD, /* fd */
942                              [1] = SCA_FLOCK, /* cmd */ }, },
943         { .name     = "fsetxattr",  .errmsg = true,
944           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
945         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
946           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
947         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
948           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
949         { .name     = "fstatfs",    .errmsg = true,
950           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
951         { .name     = "fsync",    .errmsg = true,
952           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
953         { .name     = "ftruncate", .errmsg = true,
954           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
955         { .name     = "futex",      .errmsg = true,
956           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
957         { .name     = "futimesat", .errmsg = true,
958           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
959         { .name     = "getdents",   .errmsg = true,
960           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
961         { .name     = "getdents64", .errmsg = true,
962           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
963         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
964         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
965         { .name     = "ioctl",      .errmsg = true,
966           .arg_scnprintf = { [0] = SCA_FD, /* fd */ 
967 #if defined(__i386__) || defined(__x86_64__)
968 /*
969  * FIXME: Make this available to all arches.
970  */
971                              [1] = SCA_STRHEXARRAY, /* cmd */
972                              [2] = SCA_HEX, /* arg */ },
973           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
974 #else
975                              [2] = SCA_HEX, /* arg */ }, },
976 #endif
977         { .name     = "kill",       .errmsg = true,
978           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
979         { .name     = "linkat",     .errmsg = true,
980           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
981         { .name     = "lseek",      .errmsg = true,
982           .arg_scnprintf = { [0] = SCA_FD, /* fd */
983                              [2] = SCA_STRARRAY, /* whence */ },
984           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
985         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
986         { .name     = "madvise",    .errmsg = true,
987           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
988                              [2] = SCA_MADV_BHV, /* behavior */ }, },
989         { .name     = "mkdirat",    .errmsg = true,
990           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
991         { .name     = "mknodat",    .errmsg = true,
992           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
993         { .name     = "mlock",      .errmsg = true,
994           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
995         { .name     = "mlockall",   .errmsg = true,
996           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
997         { .name     = "mmap",       .hexret = true,
998           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
999                              [2] = SCA_MMAP_PROT, /* prot */
1000                              [3] = SCA_MMAP_FLAGS, /* flags */
1001                              [4] = SCA_FD,        /* fd */ }, },
1002         { .name     = "mprotect",   .errmsg = true,
1003           .arg_scnprintf = { [0] = SCA_HEX, /* start */
1004                              [2] = SCA_MMAP_PROT, /* prot */ }, },
1005         { .name     = "mremap",     .hexret = true,
1006           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1007                              [4] = SCA_HEX, /* new_addr */ }, },
1008         { .name     = "munlock",    .errmsg = true,
1009           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1010         { .name     = "munmap",     .errmsg = true,
1011           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1012         { .name     = "name_to_handle_at", .errmsg = true,
1013           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1014         { .name     = "newfstatat", .errmsg = true,
1015           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1016         { .name     = "open",       .errmsg = true,
1017           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1018         { .name     = "open_by_handle_at", .errmsg = true,
1019           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1020                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1021         { .name     = "openat",     .errmsg = true,
1022           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1023                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1024         { .name     = "pipe2",      .errmsg = true,
1025           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1026         { .name     = "poll",       .errmsg = true, .timeout = true, },
1027         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
1028         { .name     = "pread",      .errmsg = true, .alias = "pread64",
1029           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1030         { .name     = "preadv",     .errmsg = true, .alias = "pread",
1031           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1032         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1033         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
1034           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1035         { .name     = "pwritev",    .errmsg = true,
1036           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1037         { .name     = "read",       .errmsg = true,
1038           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1039         { .name     = "readlinkat", .errmsg = true,
1040           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1041         { .name     = "readv",      .errmsg = true,
1042           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1043         { .name     = "recvfrom",   .errmsg = true,
1044           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1045         { .name     = "recvmmsg",   .errmsg = true,
1046           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1047         { .name     = "recvmsg",    .errmsg = true,
1048           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1049         { .name     = "renameat",   .errmsg = true,
1050           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1051         { .name     = "rt_sigaction", .errmsg = true,
1052           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1053         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1054         { .name     = "rt_sigqueueinfo", .errmsg = true,
1055           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1056         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1057           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1058         { .name     = "select",     .errmsg = true, .timeout = true, },
1059         { .name     = "sendmmsg",    .errmsg = true,
1060           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1061         { .name     = "sendmsg",    .errmsg = true,
1062           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1063         { .name     = "sendto",     .errmsg = true,
1064           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1065         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1066         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1067         { .name     = "shutdown",   .errmsg = true,
1068           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1069         { .name     = "socket",     .errmsg = true,
1070           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1071                              [1] = SCA_SK_TYPE, /* type */ },
1072           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1073         { .name     = "socketpair", .errmsg = true,
1074           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1075                              [1] = SCA_SK_TYPE, /* type */ },
1076           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1077         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
1078         { .name     = "symlinkat",  .errmsg = true,
1079           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1080         { .name     = "tgkill",     .errmsg = true,
1081           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1082         { .name     = "tkill",      .errmsg = true,
1083           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1084         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1085         { .name     = "unlinkat",   .errmsg = true,
1086           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1087         { .name     = "utimensat",  .errmsg = true,
1088           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1089         { .name     = "write",      .errmsg = true,
1090           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1091         { .name     = "writev",     .errmsg = true,
1092           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1093 };
1094
1095 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1096 {
1097         const struct syscall_fmt *fmt = fmtp;
1098         return strcmp(name, fmt->name);
1099 }
1100
1101 static struct syscall_fmt *syscall_fmt__find(const char *name)
1102 {
1103         const int nmemb = ARRAY_SIZE(syscall_fmts);
1104         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1105 }
1106
1107 struct syscall {
1108         struct event_format *tp_format;
1109         const char          *name;
1110         bool                filtered;
1111         struct syscall_fmt  *fmt;
1112         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1113         void                **arg_parm;
1114 };
1115
1116 static size_t fprintf_duration(unsigned long t, FILE *fp)
1117 {
1118         double duration = (double)t / NSEC_PER_MSEC;
1119         size_t printed = fprintf(fp, "(");
1120
1121         if (duration >= 1.0)
1122                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1123         else if (duration >= 0.01)
1124                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1125         else
1126                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1127         return printed + fprintf(fp, "): ");
1128 }
1129
1130 struct thread_trace {
1131         u64               entry_time;
1132         u64               exit_time;
1133         bool              entry_pending;
1134         unsigned long     nr_events;
1135         char              *entry_str;
1136         double            runtime_ms;
1137         struct {
1138                 int       max;
1139                 char      **table;
1140         } paths;
1141
1142         struct intlist *syscall_stats;
1143 };
1144
1145 static struct thread_trace *thread_trace__new(void)
1146 {
1147         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1148
1149         if (ttrace)
1150                 ttrace->paths.max = -1;
1151
1152         ttrace->syscall_stats = intlist__new(NULL);
1153
1154         return ttrace;
1155 }
1156
1157 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1158 {
1159         struct thread_trace *ttrace;
1160
1161         if (thread == NULL)
1162                 goto fail;
1163
1164         if (thread->priv == NULL)
1165                 thread->priv = thread_trace__new();
1166                 
1167         if (thread->priv == NULL)
1168                 goto fail;
1169
1170         ttrace = thread->priv;
1171         ++ttrace->nr_events;
1172
1173         return ttrace;
1174 fail:
1175         color_fprintf(fp, PERF_COLOR_RED,
1176                       "WARNING: not enough memory, dropping samples!\n");
1177         return NULL;
1178 }
1179
1180 struct trace {
1181         struct perf_tool        tool;
1182         struct {
1183                 int             machine;
1184                 int             open_id;
1185         }                       audit;
1186         struct {
1187                 int             max;
1188                 struct syscall  *table;
1189         } syscalls;
1190         struct record_opts      opts;
1191         struct machine          *host;
1192         u64                     base_time;
1193         FILE                    *output;
1194         unsigned long           nr_events;
1195         struct strlist          *ev_qualifier;
1196         const char              *last_vfs_getname;
1197         struct intlist          *tid_list;
1198         struct intlist          *pid_list;
1199         double                  duration_filter;
1200         double                  runtime_ms;
1201         struct {
1202                 u64             vfs_getname,
1203                                 proc_getname;
1204         } stats;
1205         bool                    not_ev_qualifier;
1206         bool                    live;
1207         bool                    full_time;
1208         bool                    sched;
1209         bool                    multiple_threads;
1210         bool                    summary;
1211         bool                    summary_only;
1212         bool                    show_comm;
1213         bool                    show_tool_stats;
1214 };
1215
1216 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1217 {
1218         struct thread_trace *ttrace = thread->priv;
1219
1220         if (fd > ttrace->paths.max) {
1221                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1222
1223                 if (npath == NULL)
1224                         return -1;
1225
1226                 if (ttrace->paths.max != -1) {
1227                         memset(npath + ttrace->paths.max + 1, 0,
1228                                (fd - ttrace->paths.max) * sizeof(char *));
1229                 } else {
1230                         memset(npath, 0, (fd + 1) * sizeof(char *));
1231                 }
1232
1233                 ttrace->paths.table = npath;
1234                 ttrace->paths.max   = fd;
1235         }
1236
1237         ttrace->paths.table[fd] = strdup(pathname);
1238
1239         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1240 }
1241
1242 static int thread__read_fd_path(struct thread *thread, int fd)
1243 {
1244         char linkname[PATH_MAX], pathname[PATH_MAX];
1245         struct stat st;
1246         int ret;
1247
1248         if (thread->pid_ == thread->tid) {
1249                 scnprintf(linkname, sizeof(linkname),
1250                           "/proc/%d/fd/%d", thread->pid_, fd);
1251         } else {
1252                 scnprintf(linkname, sizeof(linkname),
1253                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1254         }
1255
1256         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1257                 return -1;
1258
1259         ret = readlink(linkname, pathname, sizeof(pathname));
1260
1261         if (ret < 0 || ret > st.st_size)
1262                 return -1;
1263
1264         pathname[ret] = '\0';
1265         return trace__set_fd_pathname(thread, fd, pathname);
1266 }
1267
1268 static const char *thread__fd_path(struct thread *thread, int fd,
1269                                    struct trace *trace)
1270 {
1271         struct thread_trace *ttrace = thread->priv;
1272
1273         if (ttrace == NULL)
1274                 return NULL;
1275
1276         if (fd < 0)
1277                 return NULL;
1278
1279         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
1280                 if (!trace->live)
1281                         return NULL;
1282                 ++trace->stats.proc_getname;
1283                 if (thread__read_fd_path(thread, fd)) {
1284                         return NULL;
1285         }
1286
1287         return ttrace->paths.table[fd];
1288 }
1289
1290 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1291                                         struct syscall_arg *arg)
1292 {
1293         int fd = arg->val;
1294         size_t printed = scnprintf(bf, size, "%d", fd);
1295         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1296
1297         if (path)
1298                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1299
1300         return printed;
1301 }
1302
1303 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1304                                               struct syscall_arg *arg)
1305 {
1306         int fd = arg->val;
1307         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1308         struct thread_trace *ttrace = arg->thread->priv;
1309
1310         if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1311                 zfree(&ttrace->paths.table[fd]);
1312
1313         return printed;
1314 }
1315
1316 static bool trace__filter_duration(struct trace *trace, double t)
1317 {
1318         return t < (trace->duration_filter * NSEC_PER_MSEC);
1319 }
1320
1321 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1322 {
1323         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1324
1325         return fprintf(fp, "%10.3f ", ts);
1326 }
1327
1328 static bool done = false;
1329 static bool interrupted = false;
1330
1331 static void sig_handler(int sig)
1332 {
1333         done = true;
1334         interrupted = sig == SIGINT;
1335 }
1336
1337 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1338                                         u64 duration, u64 tstamp, FILE *fp)
1339 {
1340         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1341         printed += fprintf_duration(duration, fp);
1342
1343         if (trace->multiple_threads) {
1344                 if (trace->show_comm)
1345                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1346                 printed += fprintf(fp, "%d ", thread->tid);
1347         }
1348
1349         return printed;
1350 }
1351
1352 static int trace__process_event(struct trace *trace, struct machine *machine,
1353                                 union perf_event *event, struct perf_sample *sample)
1354 {
1355         int ret = 0;
1356
1357         switch (event->header.type) {
1358         case PERF_RECORD_LOST:
1359                 color_fprintf(trace->output, PERF_COLOR_RED,
1360                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1361                 ret = machine__process_lost_event(machine, event, sample);
1362         default:
1363                 ret = machine__process_event(machine, event, sample);
1364                 break;
1365         }
1366
1367         return ret;
1368 }
1369
1370 static int trace__tool_process(struct perf_tool *tool,
1371                                union perf_event *event,
1372                                struct perf_sample *sample,
1373                                struct machine *machine)
1374 {
1375         struct trace *trace = container_of(tool, struct trace, tool);
1376         return trace__process_event(trace, machine, event, sample);
1377 }
1378
1379 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1380 {
1381         int err = symbol__init();
1382
1383         if (err)
1384                 return err;
1385
1386         trace->host = machine__new_host();
1387         if (trace->host == NULL)
1388                 return -ENOMEM;
1389
1390         err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1391                                             evlist->threads, trace__tool_process, false);
1392         if (err)
1393                 symbol__exit();
1394
1395         return err;
1396 }
1397
1398 static int syscall__set_arg_fmts(struct syscall *sc)
1399 {
1400         struct format_field *field;
1401         int idx = 0;
1402
1403         sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1404         if (sc->arg_scnprintf == NULL)
1405                 return -1;
1406
1407         if (sc->fmt)
1408                 sc->arg_parm = sc->fmt->arg_parm;
1409
1410         for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1411                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1412                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1413                 else if (field->flags & FIELD_IS_POINTER)
1414                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1415                 ++idx;
1416         }
1417
1418         return 0;
1419 }
1420
1421 static int trace__read_syscall_info(struct trace *trace, int id)
1422 {
1423         char tp_name[128];
1424         struct syscall *sc;
1425         const char *name = audit_syscall_to_name(id, trace->audit.machine);
1426
1427         if (name == NULL)
1428                 return -1;
1429
1430         if (id > trace->syscalls.max) {
1431                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1432
1433                 if (nsyscalls == NULL)
1434                         return -1;
1435
1436                 if (trace->syscalls.max != -1) {
1437                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1438                                (id - trace->syscalls.max) * sizeof(*sc));
1439                 } else {
1440                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1441                 }
1442
1443                 trace->syscalls.table = nsyscalls;
1444                 trace->syscalls.max   = id;
1445         }
1446
1447         sc = trace->syscalls.table + id;
1448         sc->name = name;
1449
1450         if (trace->ev_qualifier) {
1451                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1452
1453                 if (!(in ^ trace->not_ev_qualifier)) {
1454                         sc->filtered = true;
1455                         /*
1456                          * No need to do read tracepoint information since this will be
1457                          * filtered out.
1458                          */
1459                         return 0;
1460                 }
1461         }
1462
1463         sc->fmt  = syscall_fmt__find(sc->name);
1464
1465         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1466         sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1467
1468         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1469                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1470                 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1471         }
1472
1473         if (sc->tp_format == NULL)
1474                 return -1;
1475
1476         return syscall__set_arg_fmts(sc);
1477 }
1478
1479 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1480                                       unsigned long *args, struct trace *trace,
1481                                       struct thread *thread)
1482 {
1483         size_t printed = 0;
1484
1485         if (sc->tp_format != NULL) {
1486                 struct format_field *field;
1487                 u8 bit = 1;
1488                 struct syscall_arg arg = {
1489                         .idx    = 0,
1490                         .mask   = 0,
1491                         .trace  = trace,
1492                         .thread = thread,
1493                 };
1494
1495                 for (field = sc->tp_format->format.fields->next; field;
1496                      field = field->next, ++arg.idx, bit <<= 1) {
1497                         if (arg.mask & bit)
1498                                 continue;
1499                         /*
1500                          * Suppress this argument if its value is zero and
1501                          * and we don't have a string associated in an
1502                          * strarray for it.
1503                          */
1504                         if (args[arg.idx] == 0 &&
1505                             !(sc->arg_scnprintf &&
1506                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1507                               sc->arg_parm[arg.idx]))
1508                                 continue;
1509
1510                         printed += scnprintf(bf + printed, size - printed,
1511                                              "%s%s: ", printed ? ", " : "", field->name);
1512                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1513                                 arg.val = args[arg.idx];
1514                                 if (sc->arg_parm)
1515                                         arg.parm = sc->arg_parm[arg.idx];
1516                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1517                                                                       size - printed, &arg);
1518                         } else {
1519                                 printed += scnprintf(bf + printed, size - printed,
1520                                                      "%ld", args[arg.idx]);
1521                         }
1522                 }
1523         } else {
1524                 int i = 0;
1525
1526                 while (i < 6) {
1527                         printed += scnprintf(bf + printed, size - printed,
1528                                              "%sarg%d: %ld",
1529                                              printed ? ", " : "", i, args[i]);
1530                         ++i;
1531                 }
1532         }
1533
1534         return printed;
1535 }
1536
1537 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1538                                   struct perf_sample *sample);
1539
1540 static struct syscall *trace__syscall_info(struct trace *trace,
1541                                            struct perf_evsel *evsel, int id)
1542 {
1543
1544         if (id < 0) {
1545
1546                 /*
1547                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1548                  * before that, leaving at a higher verbosity level till that is
1549                  * explained. Reproduced with plain ftrace with:
1550                  *
1551                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1552                  * grep "NR -1 " /t/trace_pipe
1553                  *
1554                  * After generating some load on the machine.
1555                  */
1556                 if (verbose > 1) {
1557                         static u64 n;
1558                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1559                                 id, perf_evsel__name(evsel), ++n);
1560                 }
1561                 return NULL;
1562         }
1563
1564         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1565             trace__read_syscall_info(trace, id))
1566                 goto out_cant_read;
1567
1568         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1569                 goto out_cant_read;
1570
1571         return &trace->syscalls.table[id];
1572
1573 out_cant_read:
1574         if (verbose) {
1575                 fprintf(trace->output, "Problems reading syscall %d", id);
1576                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1577                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1578                 fputs(" information\n", trace->output);
1579         }
1580         return NULL;
1581 }
1582
1583 static void thread__update_stats(struct thread_trace *ttrace,
1584                                  int id, struct perf_sample *sample)
1585 {
1586         struct int_node *inode;
1587         struct stats *stats;
1588         u64 duration = 0;
1589
1590         inode = intlist__findnew(ttrace->syscall_stats, id);
1591         if (inode == NULL)
1592                 return;
1593
1594         stats = inode->priv;
1595         if (stats == NULL) {
1596                 stats = malloc(sizeof(struct stats));
1597                 if (stats == NULL)
1598                         return;
1599                 init_stats(stats);
1600                 inode->priv = stats;
1601         }
1602
1603         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1604                 duration = sample->time - ttrace->entry_time;
1605
1606         update_stats(stats, duration);
1607 }
1608
1609 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1610                             struct perf_sample *sample)
1611 {
1612         char *msg;
1613         void *args;
1614         size_t printed = 0;
1615         struct thread *thread;
1616         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1617         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1618         struct thread_trace *ttrace;
1619
1620         if (sc == NULL)
1621                 return -1;
1622
1623         if (sc->filtered)
1624                 return 0;
1625
1626         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1627         ttrace = thread__trace(thread, trace->output);
1628         if (ttrace == NULL)
1629                 return -1;
1630
1631         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1632         ttrace = thread->priv;
1633
1634         if (ttrace->entry_str == NULL) {
1635                 ttrace->entry_str = malloc(1024);
1636                 if (!ttrace->entry_str)
1637                         return -1;
1638         }
1639
1640         ttrace->entry_time = sample->time;
1641         msg = ttrace->entry_str;
1642         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1643
1644         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1645                                            args, trace, thread);
1646
1647         if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1648                 if (!trace->duration_filter && !trace->summary_only) {
1649                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1650                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1651                 }
1652         } else
1653                 ttrace->entry_pending = true;
1654
1655         return 0;
1656 }
1657
1658 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1659                            struct perf_sample *sample)
1660 {
1661         int ret;
1662         u64 duration = 0;
1663         struct thread *thread;
1664         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1665         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1666         struct thread_trace *ttrace;
1667
1668         if (sc == NULL)
1669                 return -1;
1670
1671         if (sc->filtered)
1672                 return 0;
1673
1674         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1675         ttrace = thread__trace(thread, trace->output);
1676         if (ttrace == NULL)
1677                 return -1;
1678
1679         if (trace->summary)
1680                 thread__update_stats(ttrace, id, sample);
1681
1682         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1683
1684         if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1685                 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1686                 trace->last_vfs_getname = NULL;
1687                 ++trace->stats.vfs_getname;
1688         }
1689
1690         ttrace = thread->priv;
1691
1692         ttrace->exit_time = sample->time;
1693
1694         if (ttrace->entry_time) {
1695                 duration = sample->time - ttrace->entry_time;
1696                 if (trace__filter_duration(trace, duration))
1697                         goto out;
1698         } else if (trace->duration_filter)
1699                 goto out;
1700
1701         if (trace->summary_only)
1702                 goto out;
1703
1704         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1705
1706         if (ttrace->entry_pending) {
1707                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1708         } else {
1709                 fprintf(trace->output, " ... [");
1710                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1711                 fprintf(trace->output, "]: %s()", sc->name);
1712         }
1713
1714         if (sc->fmt == NULL) {
1715 signed_print:
1716                 fprintf(trace->output, ") = %d", ret);
1717         } else if (ret < 0 && sc->fmt->errmsg) {
1718                 char bf[256];
1719                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1720                            *e = audit_errno_to_name(-ret);
1721
1722                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1723         } else if (ret == 0 && sc->fmt->timeout)
1724                 fprintf(trace->output, ") = 0 Timeout");
1725         else if (sc->fmt->hexret)
1726                 fprintf(trace->output, ") = %#x", ret);
1727         else
1728                 goto signed_print;
1729
1730         fputc('\n', trace->output);
1731 out:
1732         ttrace->entry_pending = false;
1733
1734         return 0;
1735 }
1736
1737 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1738                               struct perf_sample *sample)
1739 {
1740         trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1741         return 0;
1742 }
1743
1744 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1745                                      struct perf_sample *sample)
1746 {
1747         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1748         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1749         struct thread *thread = machine__findnew_thread(trace->host,
1750                                                         sample->pid,
1751                                                         sample->tid);
1752         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1753
1754         if (ttrace == NULL)
1755                 goto out_dump;
1756
1757         ttrace->runtime_ms += runtime_ms;
1758         trace->runtime_ms += runtime_ms;
1759         return 0;
1760
1761 out_dump:
1762         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1763                evsel->name,
1764                perf_evsel__strval(evsel, sample, "comm"),
1765                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1766                runtime,
1767                perf_evsel__intval(evsel, sample, "vruntime"));
1768         return 0;
1769 }
1770
1771 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1772 {
1773         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1774             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1775                 return false;
1776
1777         if (trace->pid_list || trace->tid_list)
1778                 return true;
1779
1780         return false;
1781 }
1782
1783 static int trace__process_sample(struct perf_tool *tool,
1784                                  union perf_event *event __maybe_unused,
1785                                  struct perf_sample *sample,
1786                                  struct perf_evsel *evsel,
1787                                  struct machine *machine __maybe_unused)
1788 {
1789         struct trace *trace = container_of(tool, struct trace, tool);
1790         int err = 0;
1791
1792         tracepoint_handler handler = evsel->handler;
1793
1794         if (skip_sample(trace, sample))
1795                 return 0;
1796
1797         if (!trace->full_time && trace->base_time == 0)
1798                 trace->base_time = sample->time;
1799
1800         if (handler) {
1801                 ++trace->nr_events;
1802                 handler(trace, evsel, sample);
1803         }
1804
1805         return err;
1806 }
1807
1808 static int parse_target_str(struct trace *trace)
1809 {
1810         if (trace->opts.target.pid) {
1811                 trace->pid_list = intlist__new(trace->opts.target.pid);
1812                 if (trace->pid_list == NULL) {
1813                         pr_err("Error parsing process id string\n");
1814                         return -EINVAL;
1815                 }
1816         }
1817
1818         if (trace->opts.target.tid) {
1819                 trace->tid_list = intlist__new(trace->opts.target.tid);
1820                 if (trace->tid_list == NULL) {
1821                         pr_err("Error parsing thread id string\n");
1822                         return -EINVAL;
1823                 }
1824         }
1825
1826         return 0;
1827 }
1828
1829 static int trace__record(int argc, const char **argv)
1830 {
1831         unsigned int rec_argc, i, j;
1832         const char **rec_argv;
1833         const char * const record_args[] = {
1834                 "record",
1835                 "-R",
1836                 "-m", "1024",
1837                 "-c", "1",
1838                 "-e",
1839         };
1840
1841         /* +1 is for the event string below */
1842         rec_argc = ARRAY_SIZE(record_args) + 1 + argc;
1843         rec_argv = calloc(rec_argc + 1, sizeof(char *));
1844
1845         if (rec_argv == NULL)
1846                 return -ENOMEM;
1847
1848         for (i = 0; i < ARRAY_SIZE(record_args); i++)
1849                 rec_argv[i] = record_args[i];
1850
1851         /* event string may be different for older kernels - e.g., RHEL6 */
1852         if (is_valid_tracepoint("raw_syscalls:sys_enter"))
1853                 rec_argv[i] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
1854         else if (is_valid_tracepoint("syscalls:sys_enter"))
1855                 rec_argv[i] = "syscalls:sys_enter,syscalls:sys_exit";
1856         else {
1857                 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
1858                 return -1;
1859         }
1860         i++;
1861
1862         for (j = 0; j < (unsigned int)argc; j++, i++)
1863                 rec_argv[i] = argv[j];
1864
1865         return cmd_record(i, rec_argv, NULL);
1866 }
1867
1868 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1869
1870 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1871 {
1872         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
1873         if (evsel == NULL)
1874                 return;
1875
1876         if (perf_evsel__field(evsel, "pathname") == NULL) {
1877                 perf_evsel__delete(evsel);
1878                 return;
1879         }
1880
1881         evsel->handler = trace__vfs_getname;
1882         perf_evlist__add(evlist, evsel);
1883 }
1884
1885 static int trace__run(struct trace *trace, int argc, const char **argv)
1886 {
1887         struct perf_evlist *evlist = perf_evlist__new();
1888         struct perf_evsel *evsel;
1889         int err = -1, i;
1890         unsigned long before;
1891         const bool forks = argc > 0;
1892
1893         trace->live = true;
1894
1895         if (evlist == NULL) {
1896                 fprintf(trace->output, "Not enough memory to run!\n");
1897                 goto out;
1898         }
1899
1900         if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
1901                 goto out_error_tp;
1902
1903         perf_evlist__add_vfs_getname(evlist);
1904
1905         if (trace->sched &&
1906                 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1907                                 trace__sched_stat_runtime))
1908                 goto out_error_tp;
1909
1910         err = perf_evlist__create_maps(evlist, &trace->opts.target);
1911         if (err < 0) {
1912                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1913                 goto out_delete_evlist;
1914         }
1915
1916         err = trace__symbols_init(trace, evlist);
1917         if (err < 0) {
1918                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1919                 goto out_delete_evlist;
1920         }
1921
1922         perf_evlist__config(evlist, &trace->opts);
1923
1924         signal(SIGCHLD, sig_handler);
1925         signal(SIGINT, sig_handler);
1926
1927         if (forks) {
1928                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1929                                                     argv, false, NULL);
1930                 if (err < 0) {
1931                         fprintf(trace->output, "Couldn't run the workload!\n");
1932                         goto out_delete_evlist;
1933                 }
1934         }
1935
1936         err = perf_evlist__open(evlist);
1937         if (err < 0)
1938                 goto out_error_open;
1939
1940         err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
1941         if (err < 0) {
1942                 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1943                 goto out_delete_evlist;
1944         }
1945
1946         perf_evlist__enable(evlist);
1947
1948         if (forks)
1949                 perf_evlist__start_workload(evlist);
1950
1951         trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1952 again:
1953         before = trace->nr_events;
1954
1955         for (i = 0; i < evlist->nr_mmaps; i++) {
1956                 union perf_event *event;
1957
1958                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1959                         const u32 type = event->header.type;
1960                         tracepoint_handler handler;
1961                         struct perf_sample sample;
1962
1963                         ++trace->nr_events;
1964
1965                         err = perf_evlist__parse_sample(evlist, event, &sample);
1966                         if (err) {
1967                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1968                                 goto next_event;
1969                         }
1970
1971                         if (!trace->full_time && trace->base_time == 0)
1972                                 trace->base_time = sample.time;
1973
1974                         if (type != PERF_RECORD_SAMPLE) {
1975                                 trace__process_event(trace, trace->host, event, &sample);
1976                                 continue;
1977                         }
1978
1979                         evsel = perf_evlist__id2evsel(evlist, sample.id);
1980                         if (evsel == NULL) {
1981                                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1982                                 goto next_event;
1983                         }
1984
1985                         if (sample.raw_data == NULL) {
1986                                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1987                                        perf_evsel__name(evsel), sample.tid,
1988                                        sample.cpu, sample.raw_size);
1989                                 goto next_event;
1990                         }
1991
1992                         handler = evsel->handler;
1993                         handler(trace, evsel, &sample);
1994 next_event:
1995                         perf_evlist__mmap_consume(evlist, i);
1996
1997                         if (interrupted)
1998                                 goto out_disable;
1999                 }
2000         }
2001
2002         if (trace->nr_events == before) {
2003                 int timeout = done ? 100 : -1;
2004
2005                 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
2006                         goto again;
2007         } else {
2008                 goto again;
2009         }
2010
2011 out_disable:
2012         perf_evlist__disable(evlist);
2013
2014         if (!err) {
2015                 if (trace->summary)
2016                         trace__fprintf_thread_summary(trace, trace->output);
2017
2018                 if (trace->show_tool_stats) {
2019                         fprintf(trace->output, "Stats:\n "
2020                                                " vfs_getname : %" PRIu64 "\n"
2021                                                " proc_getname: %" PRIu64 "\n",
2022                                 trace->stats.vfs_getname,
2023                                 trace->stats.proc_getname);
2024                 }
2025         }
2026
2027 out_delete_evlist:
2028         perf_evlist__delete(evlist);
2029 out:
2030         trace->live = false;
2031         return err;
2032 {
2033         char errbuf[BUFSIZ];
2034
2035 out_error_tp:
2036         perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
2037         goto out_error;
2038
2039 out_error_open:
2040         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2041
2042 out_error:
2043         fprintf(trace->output, "%s\n", errbuf);
2044         goto out_delete_evlist;
2045 }
2046 }
2047
2048 static int trace__replay(struct trace *trace)
2049 {
2050         const struct perf_evsel_str_handler handlers[] = {
2051                 { "probe:vfs_getname",       trace__vfs_getname, },
2052         };
2053         struct perf_data_file file = {
2054                 .path  = input_name,
2055                 .mode  = PERF_DATA_MODE_READ,
2056         };
2057         struct perf_session *session;
2058         struct perf_evsel *evsel;
2059         int err = -1;
2060
2061         trace->tool.sample        = trace__process_sample;
2062         trace->tool.mmap          = perf_event__process_mmap;
2063         trace->tool.mmap2         = perf_event__process_mmap2;
2064         trace->tool.comm          = perf_event__process_comm;
2065         trace->tool.exit          = perf_event__process_exit;
2066         trace->tool.fork          = perf_event__process_fork;
2067         trace->tool.attr          = perf_event__process_attr;
2068         trace->tool.tracing_data = perf_event__process_tracing_data;
2069         trace->tool.build_id      = perf_event__process_build_id;
2070
2071         trace->tool.ordered_samples = true;
2072         trace->tool.ordering_requires_timestamps = true;
2073
2074         /* add tid to output */
2075         trace->multiple_threads = true;
2076
2077         if (symbol__init() < 0)
2078                 return -1;
2079
2080         session = perf_session__new(&file, false, &trace->tool);
2081         if (session == NULL)
2082                 return -ENOMEM;
2083
2084         trace->host = &session->machines.host;
2085
2086         err = perf_session__set_tracepoints_handlers(session, handlers);
2087         if (err)
2088                 goto out;
2089
2090         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2091                                                      "raw_syscalls:sys_enter");
2092         /* older kernels have syscalls tp versus raw_syscalls */
2093         if (evsel == NULL)
2094                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2095                                                              "syscalls:sys_enter");
2096         if (evsel == NULL) {
2097                 pr_err("Data file does not have raw_syscalls:sys_enter event\n");
2098                 goto out;
2099         }
2100
2101         if (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2102             perf_evsel__init_sc_tp_ptr_field(evsel, args)) {
2103                 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2104                 goto out;
2105         }
2106
2107         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2108                                                      "raw_syscalls:sys_exit");
2109         if (evsel == NULL)
2110                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2111                                                              "syscalls:sys_exit");
2112         if (evsel == NULL) {
2113                 pr_err("Data file does not have raw_syscalls:sys_exit event\n");
2114                 goto out;
2115         }
2116
2117         if (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2118             perf_evsel__init_sc_tp_uint_field(evsel, ret)) {
2119                 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2120                 goto out;
2121         }
2122
2123         err = parse_target_str(trace);
2124         if (err != 0)
2125                 goto out;
2126
2127         setup_pager();
2128
2129         err = perf_session__process_events(session, &trace->tool);
2130         if (err)
2131                 pr_err("Failed to process events, error %d", err);
2132
2133         else if (trace->summary)
2134                 trace__fprintf_thread_summary(trace, trace->output);
2135
2136 out:
2137         perf_session__delete(session);
2138
2139         return err;
2140 }
2141
2142 static size_t trace__fprintf_threads_header(FILE *fp)
2143 {
2144         size_t printed;
2145
2146         printed  = fprintf(fp, "\n Summary of events:\n\n");
2147
2148         return printed;
2149 }
2150
2151 static size_t thread__dump_stats(struct thread_trace *ttrace,
2152                                  struct trace *trace, FILE *fp)
2153 {
2154         struct stats *stats;
2155         size_t printed = 0;
2156         struct syscall *sc;
2157         struct int_node *inode = intlist__first(ttrace->syscall_stats);
2158
2159         if (inode == NULL)
2160                 return 0;
2161
2162         printed += fprintf(fp, "\n");
2163
2164         printed += fprintf(fp, "   syscall            calls      min       avg       max      stddev\n");
2165         printed += fprintf(fp, "                               (msec)    (msec)    (msec)        (%%)\n");
2166         printed += fprintf(fp, "   --------------- -------- --------- --------- ---------     ------\n");
2167
2168         /* each int_node is a syscall */
2169         while (inode) {
2170                 stats = inode->priv;
2171                 if (stats) {
2172                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2173                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2174                         double avg = avg_stats(stats);
2175                         double pct;
2176                         u64 n = (u64) stats->n;
2177
2178                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2179                         avg /= NSEC_PER_MSEC;
2180
2181                         sc = &trace->syscalls.table[inode->i];
2182                         printed += fprintf(fp, "   %-15s", sc->name);
2183                         printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2184                                            n, min, avg);
2185                         printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2186                 }
2187
2188                 inode = intlist__next(inode);
2189         }
2190
2191         printed += fprintf(fp, "\n\n");
2192
2193         return printed;
2194 }
2195
2196 /* struct used to pass data to per-thread function */
2197 struct summary_data {
2198         FILE *fp;
2199         struct trace *trace;
2200         size_t printed;
2201 };
2202
2203 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2204 {
2205         struct summary_data *data = priv;
2206         FILE *fp = data->fp;
2207         size_t printed = data->printed;
2208         struct trace *trace = data->trace;
2209         struct thread_trace *ttrace = thread->priv;
2210         double ratio;
2211
2212         if (ttrace == NULL)
2213                 return 0;
2214
2215         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2216
2217         printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2218         printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2219         printed += fprintf(fp, "%.1f%%", ratio);
2220         printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2221         printed += thread__dump_stats(ttrace, trace, fp);
2222
2223         data->printed += printed;
2224
2225         return 0;
2226 }
2227
2228 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2229 {
2230         struct summary_data data = {
2231                 .fp = fp,
2232                 .trace = trace
2233         };
2234         data.printed = trace__fprintf_threads_header(fp);
2235
2236         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2237
2238         return data.printed;
2239 }
2240
2241 static int trace__set_duration(const struct option *opt, const char *str,
2242                                int unset __maybe_unused)
2243 {
2244         struct trace *trace = opt->value;
2245
2246         trace->duration_filter = atof(str);
2247         return 0;
2248 }
2249
2250 static int trace__open_output(struct trace *trace, const char *filename)
2251 {
2252         struct stat st;
2253
2254         if (!stat(filename, &st) && st.st_size) {
2255                 char oldname[PATH_MAX];
2256
2257                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2258                 unlink(oldname);
2259                 rename(filename, oldname);
2260         }
2261
2262         trace->output = fopen(filename, "w");
2263
2264         return trace->output == NULL ? -errno : 0;
2265 }
2266
2267 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2268 {
2269         const char * const trace_usage[] = {
2270                 "perf trace [<options>] [<command>]",
2271                 "perf trace [<options>] -- <command> [<options>]",
2272                 "perf trace record [<options>] [<command>]",
2273                 "perf trace record [<options>] -- <command> [<options>]",
2274                 NULL
2275         };
2276         struct trace trace = {
2277                 .audit = {
2278                         .machine = audit_detect_machine(),
2279                         .open_id = audit_name_to_syscall("open", trace.audit.machine),
2280                 },
2281                 .syscalls = {
2282                         . max = -1,
2283                 },
2284                 .opts = {
2285                         .target = {
2286                                 .uid       = UINT_MAX,
2287                                 .uses_mmap = true,
2288                         },
2289                         .user_freq     = UINT_MAX,
2290                         .user_interval = ULLONG_MAX,
2291                         .no_buffering  = true,
2292                         .mmap_pages    = 1024,
2293                 },
2294                 .output = stdout,
2295                 .show_comm = true,
2296         };
2297         const char *output_name = NULL;
2298         const char *ev_qualifier_str = NULL;
2299         const struct option trace_options[] = {
2300         OPT_BOOLEAN(0, "comm", &trace.show_comm,
2301                     "show the thread COMM next to its id"),
2302         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2303         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2304                     "list of events to trace"),
2305         OPT_STRING('o', "output", &output_name, "file", "output file name"),
2306         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2307         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2308                     "trace events on existing process id"),
2309         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2310                     "trace events on existing thread id"),
2311         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2312                     "system-wide collection from all CPUs"),
2313         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2314                     "list of cpus to monitor"),
2315         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2316                     "child tasks do not inherit counters"),
2317         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2318                      "number of mmap data pages",
2319                      perf_evlist__parse_mmap_pages),
2320         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2321                    "user to profile"),
2322         OPT_CALLBACK(0, "duration", &trace, "float",
2323                      "show only events with duration > N.M ms",
2324                      trace__set_duration),
2325         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2326         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2327         OPT_BOOLEAN('T', "time", &trace.full_time,
2328                     "Show full timestamp, not time relative to first start"),
2329         OPT_BOOLEAN('s', "summary", &trace.summary_only,
2330                     "Show only syscall summary with statistics"),
2331         OPT_BOOLEAN('S', "with-summary", &trace.summary,
2332                     "Show all syscalls and summary with statistics"),
2333         OPT_END()
2334         };
2335         int err;
2336         char bf[BUFSIZ];
2337
2338         if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2339                 return trace__record(argc-2, &argv[2]);
2340
2341         argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2342
2343         /* summary_only implies summary option, but don't overwrite summary if set */
2344         if (trace.summary_only)
2345                 trace.summary = trace.summary_only;
2346
2347         if (output_name != NULL) {
2348                 err = trace__open_output(&trace, output_name);
2349                 if (err < 0) {
2350                         perror("failed to create output file");
2351                         goto out;
2352                 }
2353         }
2354
2355         if (ev_qualifier_str != NULL) {
2356                 const char *s = ev_qualifier_str;
2357
2358                 trace.not_ev_qualifier = *s == '!';
2359                 if (trace.not_ev_qualifier)
2360                         ++s;
2361                 trace.ev_qualifier = strlist__new(true, s);
2362                 if (trace.ev_qualifier == NULL) {
2363                         fputs("Not enough memory to parse event qualifier",
2364                               trace.output);
2365                         err = -ENOMEM;
2366                         goto out_close;
2367                 }
2368         }
2369
2370         err = target__validate(&trace.opts.target);
2371         if (err) {
2372                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2373                 fprintf(trace.output, "%s", bf);
2374                 goto out_close;
2375         }
2376
2377         err = target__parse_uid(&trace.opts.target);
2378         if (err) {
2379                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2380                 fprintf(trace.output, "%s", bf);
2381                 goto out_close;
2382         }
2383
2384         if (!argc && target__none(&trace.opts.target))
2385                 trace.opts.target.system_wide = true;
2386
2387         if (input_name)
2388                 err = trace__replay(&trace);
2389         else
2390                 err = trace__run(&trace, argc, argv);
2391
2392 out_close:
2393         if (output_name != NULL)
2394                 fclose(trace.output);
2395 out:
2396         return err;
2397 }