]> Pileus Git - ~andy/linux/blob - tools/perf/builtin-trace.c
Merge tag 'sound-3.14-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai...
[~andy/linux] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
15 #include "util/parse-events.h"
16
17 #include <libaudit.h>
18 #include <stdlib.h>
19 #include <sys/eventfd.h>
20 #include <sys/mman.h>
21 #include <linux/futex.h>
22
23 /* For older distros: */
24 #ifndef MAP_STACK
25 # define MAP_STACK              0x20000
26 #endif
27
28 #ifndef MADV_HWPOISON
29 # define MADV_HWPOISON          100
30 #endif
31
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE         12
34 #endif
35
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE       13
38 #endif
39
40 #ifndef EFD_SEMAPHORE
41 # define EFD_SEMAPHORE          1
42 #endif
43
44 struct tp_field {
45         int offset;
46         union {
47                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
48                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
49         };
50 };
51
52 #define TP_UINT_FIELD(bits) \
53 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
54 { \
55         return *(u##bits *)(sample->raw_data + field->offset); \
56 }
57
58 TP_UINT_FIELD(8);
59 TP_UINT_FIELD(16);
60 TP_UINT_FIELD(32);
61 TP_UINT_FIELD(64);
62
63 #define TP_UINT_FIELD__SWAPPED(bits) \
64 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
65 { \
66         u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
67         return bswap_##bits(value);\
68 }
69
70 TP_UINT_FIELD__SWAPPED(16);
71 TP_UINT_FIELD__SWAPPED(32);
72 TP_UINT_FIELD__SWAPPED(64);
73
74 static int tp_field__init_uint(struct tp_field *field,
75                                struct format_field *format_field,
76                                bool needs_swap)
77 {
78         field->offset = format_field->offset;
79
80         switch (format_field->size) {
81         case 1:
82                 field->integer = tp_field__u8;
83                 break;
84         case 2:
85                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
86                 break;
87         case 4:
88                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
89                 break;
90         case 8:
91                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
92                 break;
93         default:
94                 return -1;
95         }
96
97         return 0;
98 }
99
100 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
101 {
102         return sample->raw_data + field->offset;
103 }
104
105 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
106 {
107         field->offset = format_field->offset;
108         field->pointer = tp_field__ptr;
109         return 0;
110 }
111
112 struct syscall_tp {
113         struct tp_field id;
114         union {
115                 struct tp_field args, ret;
116         };
117 };
118
119 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
120                                           struct tp_field *field,
121                                           const char *name)
122 {
123         struct format_field *format_field = perf_evsel__field(evsel, name);
124
125         if (format_field == NULL)
126                 return -1;
127
128         return tp_field__init_uint(field, format_field, evsel->needs_swap);
129 }
130
131 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
132         ({ struct syscall_tp *sc = evsel->priv;\
133            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
134
135 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
136                                          struct tp_field *field,
137                                          const char *name)
138 {
139         struct format_field *format_field = perf_evsel__field(evsel, name);
140
141         if (format_field == NULL)
142                 return -1;
143
144         return tp_field__init_ptr(field, format_field);
145 }
146
147 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
148         ({ struct syscall_tp *sc = evsel->priv;\
149            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
150
151 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
152 {
153         zfree(&evsel->priv);
154         perf_evsel__delete(evsel);
155 }
156
157 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
158 {
159         evsel->priv = malloc(sizeof(struct syscall_tp));
160         if (evsel->priv != NULL) {
161                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
162                         goto out_delete;
163
164                 evsel->handler = handler;
165                 return 0;
166         }
167
168         return -ENOMEM;
169
170 out_delete:
171         zfree(&evsel->priv);
172         return -ENOENT;
173 }
174
175 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
176 {
177         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
178
179         /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
180         if (evsel == NULL)
181                 evsel = perf_evsel__newtp("syscalls", direction);
182
183         if (evsel) {
184                 if (perf_evsel__init_syscall_tp(evsel, handler))
185                         goto out_delete;
186         }
187
188         return evsel;
189
190 out_delete:
191         perf_evsel__delete_priv(evsel);
192         return NULL;
193 }
194
195 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
196         ({ struct syscall_tp *fields = evsel->priv; \
197            fields->name.integer(&fields->name, sample); })
198
199 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
200         ({ struct syscall_tp *fields = evsel->priv; \
201            fields->name.pointer(&fields->name, sample); })
202
203 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
204                                           void *sys_enter_handler,
205                                           void *sys_exit_handler)
206 {
207         int ret = -1;
208         struct perf_evsel *sys_enter, *sys_exit;
209
210         sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
211         if (sys_enter == NULL)
212                 goto out;
213
214         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
215                 goto out_delete_sys_enter;
216
217         sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
218         if (sys_exit == NULL)
219                 goto out_delete_sys_enter;
220
221         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
222                 goto out_delete_sys_exit;
223
224         perf_evlist__add(evlist, sys_enter);
225         perf_evlist__add(evlist, sys_exit);
226
227         ret = 0;
228 out:
229         return ret;
230
231 out_delete_sys_exit:
232         perf_evsel__delete_priv(sys_exit);
233 out_delete_sys_enter:
234         perf_evsel__delete_priv(sys_enter);
235         goto out;
236 }
237
238
239 struct syscall_arg {
240         unsigned long val;
241         struct thread *thread;
242         struct trace  *trace;
243         void          *parm;
244         u8            idx;
245         u8            mask;
246 };
247
248 struct strarray {
249         int         offset;
250         int         nr_entries;
251         const char **entries;
252 };
253
254 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
255         .nr_entries = ARRAY_SIZE(array), \
256         .entries = array, \
257 }
258
259 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
260         .offset     = off, \
261         .nr_entries = ARRAY_SIZE(array), \
262         .entries = array, \
263 }
264
265 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
266                                                 const char *intfmt,
267                                                 struct syscall_arg *arg)
268 {
269         struct strarray *sa = arg->parm;
270         int idx = arg->val - sa->offset;
271
272         if (idx < 0 || idx >= sa->nr_entries)
273                 return scnprintf(bf, size, intfmt, arg->val);
274
275         return scnprintf(bf, size, "%s", sa->entries[idx]);
276 }
277
278 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
279                                               struct syscall_arg *arg)
280 {
281         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
282 }
283
284 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
285
286 #if defined(__i386__) || defined(__x86_64__)
287 /*
288  * FIXME: Make this available to all arches as soon as the ioctl beautifier
289  *        gets rewritten to support all arches.
290  */
291 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
292                                                  struct syscall_arg *arg)
293 {
294         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
295 }
296
297 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
298 #endif /* defined(__i386__) || defined(__x86_64__) */
299
300 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
301                                         struct syscall_arg *arg);
302
303 #define SCA_FD syscall_arg__scnprintf_fd
304
305 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
306                                            struct syscall_arg *arg)
307 {
308         int fd = arg->val;
309
310         if (fd == AT_FDCWD)
311                 return scnprintf(bf, size, "CWD");
312
313         return syscall_arg__scnprintf_fd(bf, size, arg);
314 }
315
316 #define SCA_FDAT syscall_arg__scnprintf_fd_at
317
318 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
319                                               struct syscall_arg *arg);
320
321 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
322
323 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
324                                          struct syscall_arg *arg)
325 {
326         return scnprintf(bf, size, "%#lx", arg->val);
327 }
328
329 #define SCA_HEX syscall_arg__scnprintf_hex
330
331 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
332                                                struct syscall_arg *arg)
333 {
334         int printed = 0, prot = arg->val;
335
336         if (prot == PROT_NONE)
337                 return scnprintf(bf, size, "NONE");
338 #define P_MMAP_PROT(n) \
339         if (prot & PROT_##n) { \
340                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
341                 prot &= ~PROT_##n; \
342         }
343
344         P_MMAP_PROT(EXEC);
345         P_MMAP_PROT(READ);
346         P_MMAP_PROT(WRITE);
347 #ifdef PROT_SEM
348         P_MMAP_PROT(SEM);
349 #endif
350         P_MMAP_PROT(GROWSDOWN);
351         P_MMAP_PROT(GROWSUP);
352 #undef P_MMAP_PROT
353
354         if (prot)
355                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
356
357         return printed;
358 }
359
360 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
361
362 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
363                                                 struct syscall_arg *arg)
364 {
365         int printed = 0, flags = arg->val;
366
367 #define P_MMAP_FLAG(n) \
368         if (flags & MAP_##n) { \
369                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
370                 flags &= ~MAP_##n; \
371         }
372
373         P_MMAP_FLAG(SHARED);
374         P_MMAP_FLAG(PRIVATE);
375 #ifdef MAP_32BIT
376         P_MMAP_FLAG(32BIT);
377 #endif
378         P_MMAP_FLAG(ANONYMOUS);
379         P_MMAP_FLAG(DENYWRITE);
380         P_MMAP_FLAG(EXECUTABLE);
381         P_MMAP_FLAG(FILE);
382         P_MMAP_FLAG(FIXED);
383         P_MMAP_FLAG(GROWSDOWN);
384 #ifdef MAP_HUGETLB
385         P_MMAP_FLAG(HUGETLB);
386 #endif
387         P_MMAP_FLAG(LOCKED);
388         P_MMAP_FLAG(NONBLOCK);
389         P_MMAP_FLAG(NORESERVE);
390         P_MMAP_FLAG(POPULATE);
391         P_MMAP_FLAG(STACK);
392 #ifdef MAP_UNINITIALIZED
393         P_MMAP_FLAG(UNINITIALIZED);
394 #endif
395 #undef P_MMAP_FLAG
396
397         if (flags)
398                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
399
400         return printed;
401 }
402
403 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
404
405 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
406                                                       struct syscall_arg *arg)
407 {
408         int behavior = arg->val;
409
410         switch (behavior) {
411 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
412         P_MADV_BHV(NORMAL);
413         P_MADV_BHV(RANDOM);
414         P_MADV_BHV(SEQUENTIAL);
415         P_MADV_BHV(WILLNEED);
416         P_MADV_BHV(DONTNEED);
417         P_MADV_BHV(REMOVE);
418         P_MADV_BHV(DONTFORK);
419         P_MADV_BHV(DOFORK);
420         P_MADV_BHV(HWPOISON);
421 #ifdef MADV_SOFT_OFFLINE
422         P_MADV_BHV(SOFT_OFFLINE);
423 #endif
424         P_MADV_BHV(MERGEABLE);
425         P_MADV_BHV(UNMERGEABLE);
426 #ifdef MADV_HUGEPAGE
427         P_MADV_BHV(HUGEPAGE);
428 #endif
429 #ifdef MADV_NOHUGEPAGE
430         P_MADV_BHV(NOHUGEPAGE);
431 #endif
432 #ifdef MADV_DONTDUMP
433         P_MADV_BHV(DONTDUMP);
434 #endif
435 #ifdef MADV_DODUMP
436         P_MADV_BHV(DODUMP);
437 #endif
438 #undef P_MADV_PHV
439         default: break;
440         }
441
442         return scnprintf(bf, size, "%#x", behavior);
443 }
444
445 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
446
447 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
448                                            struct syscall_arg *arg)
449 {
450         int printed = 0, op = arg->val;
451
452         if (op == 0)
453                 return scnprintf(bf, size, "NONE");
454 #define P_CMD(cmd) \
455         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
456                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
457                 op &= ~LOCK_##cmd; \
458         }
459
460         P_CMD(SH);
461         P_CMD(EX);
462         P_CMD(NB);
463         P_CMD(UN);
464         P_CMD(MAND);
465         P_CMD(RW);
466         P_CMD(READ);
467         P_CMD(WRITE);
468 #undef P_OP
469
470         if (op)
471                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
472
473         return printed;
474 }
475
476 #define SCA_FLOCK syscall_arg__scnprintf_flock
477
478 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
479 {
480         enum syscall_futex_args {
481                 SCF_UADDR   = (1 << 0),
482                 SCF_OP      = (1 << 1),
483                 SCF_VAL     = (1 << 2),
484                 SCF_TIMEOUT = (1 << 3),
485                 SCF_UADDR2  = (1 << 4),
486                 SCF_VAL3    = (1 << 5),
487         };
488         int op = arg->val;
489         int cmd = op & FUTEX_CMD_MASK;
490         size_t printed = 0;
491
492         switch (cmd) {
493 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
494         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
495         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
496         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
497         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
498         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
499         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
500         P_FUTEX_OP(WAKE_OP);                                                      break;
501         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
502         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
503         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
504         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
505         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
506         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
507         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
508         }
509
510         if (op & FUTEX_PRIVATE_FLAG)
511                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
512
513         if (op & FUTEX_CLOCK_REALTIME)
514                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
515
516         return printed;
517 }
518
519 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
520
521 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
522 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
523
524 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
525 static DEFINE_STRARRAY(itimers);
526
527 static const char *whences[] = { "SET", "CUR", "END",
528 #ifdef SEEK_DATA
529 "DATA",
530 #endif
531 #ifdef SEEK_HOLE
532 "HOLE",
533 #endif
534 };
535 static DEFINE_STRARRAY(whences);
536
537 static const char *fcntl_cmds[] = {
538         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
539         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
540         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
541         "F_GETOWNER_UIDS",
542 };
543 static DEFINE_STRARRAY(fcntl_cmds);
544
545 static const char *rlimit_resources[] = {
546         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
547         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
548         "RTTIME",
549 };
550 static DEFINE_STRARRAY(rlimit_resources);
551
552 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
553 static DEFINE_STRARRAY(sighow);
554
555 static const char *clockid[] = {
556         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
557         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
558 };
559 static DEFINE_STRARRAY(clockid);
560
561 static const char *socket_families[] = {
562         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
563         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
564         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
565         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
566         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
567         "ALG", "NFC", "VSOCK",
568 };
569 static DEFINE_STRARRAY(socket_families);
570
571 #ifndef SOCK_TYPE_MASK
572 #define SOCK_TYPE_MASK 0xf
573 #endif
574
575 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
576                                                       struct syscall_arg *arg)
577 {
578         size_t printed;
579         int type = arg->val,
580             flags = type & ~SOCK_TYPE_MASK;
581
582         type &= SOCK_TYPE_MASK;
583         /*
584          * Can't use a strarray, MIPS may override for ABI reasons.
585          */
586         switch (type) {
587 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
588         P_SK_TYPE(STREAM);
589         P_SK_TYPE(DGRAM);
590         P_SK_TYPE(RAW);
591         P_SK_TYPE(RDM);
592         P_SK_TYPE(SEQPACKET);
593         P_SK_TYPE(DCCP);
594         P_SK_TYPE(PACKET);
595 #undef P_SK_TYPE
596         default:
597                 printed = scnprintf(bf, size, "%#x", type);
598         }
599
600 #define P_SK_FLAG(n) \
601         if (flags & SOCK_##n) { \
602                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
603                 flags &= ~SOCK_##n; \
604         }
605
606         P_SK_FLAG(CLOEXEC);
607         P_SK_FLAG(NONBLOCK);
608 #undef P_SK_FLAG
609
610         if (flags)
611                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
612
613         return printed;
614 }
615
616 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
617
618 #ifndef MSG_PROBE
619 #define MSG_PROBE            0x10
620 #endif
621 #ifndef MSG_WAITFORONE
622 #define MSG_WAITFORONE  0x10000
623 #endif
624 #ifndef MSG_SENDPAGE_NOTLAST
625 #define MSG_SENDPAGE_NOTLAST 0x20000
626 #endif
627 #ifndef MSG_FASTOPEN
628 #define MSG_FASTOPEN         0x20000000
629 #endif
630
631 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
632                                                struct syscall_arg *arg)
633 {
634         int printed = 0, flags = arg->val;
635
636         if (flags == 0)
637                 return scnprintf(bf, size, "NONE");
638 #define P_MSG_FLAG(n) \
639         if (flags & MSG_##n) { \
640                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
641                 flags &= ~MSG_##n; \
642         }
643
644         P_MSG_FLAG(OOB);
645         P_MSG_FLAG(PEEK);
646         P_MSG_FLAG(DONTROUTE);
647         P_MSG_FLAG(TRYHARD);
648         P_MSG_FLAG(CTRUNC);
649         P_MSG_FLAG(PROBE);
650         P_MSG_FLAG(TRUNC);
651         P_MSG_FLAG(DONTWAIT);
652         P_MSG_FLAG(EOR);
653         P_MSG_FLAG(WAITALL);
654         P_MSG_FLAG(FIN);
655         P_MSG_FLAG(SYN);
656         P_MSG_FLAG(CONFIRM);
657         P_MSG_FLAG(RST);
658         P_MSG_FLAG(ERRQUEUE);
659         P_MSG_FLAG(NOSIGNAL);
660         P_MSG_FLAG(MORE);
661         P_MSG_FLAG(WAITFORONE);
662         P_MSG_FLAG(SENDPAGE_NOTLAST);
663         P_MSG_FLAG(FASTOPEN);
664         P_MSG_FLAG(CMSG_CLOEXEC);
665 #undef P_MSG_FLAG
666
667         if (flags)
668                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
669
670         return printed;
671 }
672
673 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
674
675 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
676                                                  struct syscall_arg *arg)
677 {
678         size_t printed = 0;
679         int mode = arg->val;
680
681         if (mode == F_OK) /* 0 */
682                 return scnprintf(bf, size, "F");
683 #define P_MODE(n) \
684         if (mode & n##_OK) { \
685                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
686                 mode &= ~n##_OK; \
687         }
688
689         P_MODE(R);
690         P_MODE(W);
691         P_MODE(X);
692 #undef P_MODE
693
694         if (mode)
695                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
696
697         return printed;
698 }
699
700 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
701
702 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
703                                                struct syscall_arg *arg)
704 {
705         int printed = 0, flags = arg->val;
706
707         if (!(flags & O_CREAT))
708                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
709
710         if (flags == 0)
711                 return scnprintf(bf, size, "RDONLY");
712 #define P_FLAG(n) \
713         if (flags & O_##n) { \
714                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
715                 flags &= ~O_##n; \
716         }
717
718         P_FLAG(APPEND);
719         P_FLAG(ASYNC);
720         P_FLAG(CLOEXEC);
721         P_FLAG(CREAT);
722         P_FLAG(DIRECT);
723         P_FLAG(DIRECTORY);
724         P_FLAG(EXCL);
725         P_FLAG(LARGEFILE);
726         P_FLAG(NOATIME);
727         P_FLAG(NOCTTY);
728 #ifdef O_NONBLOCK
729         P_FLAG(NONBLOCK);
730 #elif O_NDELAY
731         P_FLAG(NDELAY);
732 #endif
733 #ifdef O_PATH
734         P_FLAG(PATH);
735 #endif
736         P_FLAG(RDWR);
737 #ifdef O_DSYNC
738         if ((flags & O_SYNC) == O_SYNC)
739                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
740         else {
741                 P_FLAG(DSYNC);
742         }
743 #else
744         P_FLAG(SYNC);
745 #endif
746         P_FLAG(TRUNC);
747         P_FLAG(WRONLY);
748 #undef P_FLAG
749
750         if (flags)
751                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
752
753         return printed;
754 }
755
756 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
757
758 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
759                                                    struct syscall_arg *arg)
760 {
761         int printed = 0, flags = arg->val;
762
763         if (flags == 0)
764                 return scnprintf(bf, size, "NONE");
765 #define P_FLAG(n) \
766         if (flags & EFD_##n) { \
767                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
768                 flags &= ~EFD_##n; \
769         }
770
771         P_FLAG(SEMAPHORE);
772         P_FLAG(CLOEXEC);
773         P_FLAG(NONBLOCK);
774 #undef P_FLAG
775
776         if (flags)
777                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
778
779         return printed;
780 }
781
782 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
783
784 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
785                                                 struct syscall_arg *arg)
786 {
787         int printed = 0, flags = arg->val;
788
789 #define P_FLAG(n) \
790         if (flags & O_##n) { \
791                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
792                 flags &= ~O_##n; \
793         }
794
795         P_FLAG(CLOEXEC);
796         P_FLAG(NONBLOCK);
797 #undef P_FLAG
798
799         if (flags)
800                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
801
802         return printed;
803 }
804
805 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
806
807 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
808 {
809         int sig = arg->val;
810
811         switch (sig) {
812 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
813         P_SIGNUM(HUP);
814         P_SIGNUM(INT);
815         P_SIGNUM(QUIT);
816         P_SIGNUM(ILL);
817         P_SIGNUM(TRAP);
818         P_SIGNUM(ABRT);
819         P_SIGNUM(BUS);
820         P_SIGNUM(FPE);
821         P_SIGNUM(KILL);
822         P_SIGNUM(USR1);
823         P_SIGNUM(SEGV);
824         P_SIGNUM(USR2);
825         P_SIGNUM(PIPE);
826         P_SIGNUM(ALRM);
827         P_SIGNUM(TERM);
828         P_SIGNUM(STKFLT);
829         P_SIGNUM(CHLD);
830         P_SIGNUM(CONT);
831         P_SIGNUM(STOP);
832         P_SIGNUM(TSTP);
833         P_SIGNUM(TTIN);
834         P_SIGNUM(TTOU);
835         P_SIGNUM(URG);
836         P_SIGNUM(XCPU);
837         P_SIGNUM(XFSZ);
838         P_SIGNUM(VTALRM);
839         P_SIGNUM(PROF);
840         P_SIGNUM(WINCH);
841         P_SIGNUM(IO);
842         P_SIGNUM(PWR);
843         P_SIGNUM(SYS);
844         default: break;
845         }
846
847         return scnprintf(bf, size, "%#x", sig);
848 }
849
850 #define SCA_SIGNUM syscall_arg__scnprintf_signum
851
852 #if defined(__i386__) || defined(__x86_64__)
853 /*
854  * FIXME: Make this available to all arches.
855  */
856 #define TCGETS          0x5401
857
858 static const char *tioctls[] = {
859         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
860         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
861         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
862         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
863         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
864         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
865         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
866         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
867         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
868         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
869         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
870         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
871         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
872         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
873         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
874 };
875
876 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
877 #endif /* defined(__i386__) || defined(__x86_64__) */
878
879 #define STRARRAY(arg, name, array) \
880           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
881           .arg_parm      = { [arg] = &strarray__##array, }
882
883 static struct syscall_fmt {
884         const char *name;
885         const char *alias;
886         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
887         void       *arg_parm[6];
888         bool       errmsg;
889         bool       timeout;
890         bool       hexret;
891 } syscall_fmts[] = {
892         { .name     = "access",     .errmsg = true,
893           .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
894         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
895         { .name     = "brk",        .hexret = true,
896           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
897         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
898         { .name     = "close",      .errmsg = true,
899           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, 
900         { .name     = "connect",    .errmsg = true, },
901         { .name     = "dup",        .errmsg = true,
902           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
903         { .name     = "dup2",       .errmsg = true,
904           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
905         { .name     = "dup3",       .errmsg = true,
906           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
907         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
908         { .name     = "eventfd2",   .errmsg = true,
909           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
910         { .name     = "faccessat",  .errmsg = true,
911           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
912         { .name     = "fadvise64",  .errmsg = true,
913           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
914         { .name     = "fallocate",  .errmsg = true,
915           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
916         { .name     = "fchdir",     .errmsg = true,
917           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
918         { .name     = "fchmod",     .errmsg = true,
919           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
920         { .name     = "fchmodat",   .errmsg = true,
921           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
922         { .name     = "fchown",     .errmsg = true,
923           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
924         { .name     = "fchownat",   .errmsg = true,
925           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
926         { .name     = "fcntl",      .errmsg = true,
927           .arg_scnprintf = { [0] = SCA_FD, /* fd */
928                              [1] = SCA_STRARRAY, /* cmd */ },
929           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
930         { .name     = "fdatasync",  .errmsg = true,
931           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
932         { .name     = "flock",      .errmsg = true,
933           .arg_scnprintf = { [0] = SCA_FD, /* fd */
934                              [1] = SCA_FLOCK, /* cmd */ }, },
935         { .name     = "fsetxattr",  .errmsg = true,
936           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
937         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
938           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
939         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
940           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
941         { .name     = "fstatfs",    .errmsg = true,
942           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
943         { .name     = "fsync",    .errmsg = true,
944           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
945         { .name     = "ftruncate", .errmsg = true,
946           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
947         { .name     = "futex",      .errmsg = true,
948           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
949         { .name     = "futimesat", .errmsg = true,
950           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
951         { .name     = "getdents",   .errmsg = true,
952           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
953         { .name     = "getdents64", .errmsg = true,
954           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
955         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
956         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
957         { .name     = "ioctl",      .errmsg = true,
958           .arg_scnprintf = { [0] = SCA_FD, /* fd */ 
959 #if defined(__i386__) || defined(__x86_64__)
960 /*
961  * FIXME: Make this available to all arches.
962  */
963                              [1] = SCA_STRHEXARRAY, /* cmd */
964                              [2] = SCA_HEX, /* arg */ },
965           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
966 #else
967                              [2] = SCA_HEX, /* arg */ }, },
968 #endif
969         { .name     = "kill",       .errmsg = true,
970           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
971         { .name     = "linkat",     .errmsg = true,
972           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
973         { .name     = "lseek",      .errmsg = true,
974           .arg_scnprintf = { [0] = SCA_FD, /* fd */
975                              [2] = SCA_STRARRAY, /* whence */ },
976           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
977         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
978         { .name     = "madvise",    .errmsg = true,
979           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
980                              [2] = SCA_MADV_BHV, /* behavior */ }, },
981         { .name     = "mkdirat",    .errmsg = true,
982           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
983         { .name     = "mknodat",    .errmsg = true,
984           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
985         { .name     = "mlock",      .errmsg = true,
986           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
987         { .name     = "mlockall",   .errmsg = true,
988           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
989         { .name     = "mmap",       .hexret = true,
990           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
991                              [2] = SCA_MMAP_PROT, /* prot */
992                              [3] = SCA_MMAP_FLAGS, /* flags */
993                              [4] = SCA_FD,        /* fd */ }, },
994         { .name     = "mprotect",   .errmsg = true,
995           .arg_scnprintf = { [0] = SCA_HEX, /* start */
996                              [2] = SCA_MMAP_PROT, /* prot */ }, },
997         { .name     = "mremap",     .hexret = true,
998           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
999                              [4] = SCA_HEX, /* new_addr */ }, },
1000         { .name     = "munlock",    .errmsg = true,
1001           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1002         { .name     = "munmap",     .errmsg = true,
1003           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1004         { .name     = "name_to_handle_at", .errmsg = true,
1005           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1006         { .name     = "newfstatat", .errmsg = true,
1007           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1008         { .name     = "open",       .errmsg = true,
1009           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1010         { .name     = "open_by_handle_at", .errmsg = true,
1011           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1012                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1013         { .name     = "openat",     .errmsg = true,
1014           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1015                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1016         { .name     = "pipe2",      .errmsg = true,
1017           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1018         { .name     = "poll",       .errmsg = true, .timeout = true, },
1019         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
1020         { .name     = "pread",      .errmsg = true, .alias = "pread64",
1021           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1022         { .name     = "preadv",     .errmsg = true, .alias = "pread",
1023           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1024         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1025         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
1026           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1027         { .name     = "pwritev",    .errmsg = true,
1028           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1029         { .name     = "read",       .errmsg = true,
1030           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1031         { .name     = "readlinkat", .errmsg = true,
1032           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1033         { .name     = "readv",      .errmsg = true,
1034           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1035         { .name     = "recvfrom",   .errmsg = true,
1036           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1037         { .name     = "recvmmsg",   .errmsg = true,
1038           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1039         { .name     = "recvmsg",    .errmsg = true,
1040           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1041         { .name     = "renameat",   .errmsg = true,
1042           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1043         { .name     = "rt_sigaction", .errmsg = true,
1044           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1045         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1046         { .name     = "rt_sigqueueinfo", .errmsg = true,
1047           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1048         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1049           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1050         { .name     = "select",     .errmsg = true, .timeout = true, },
1051         { .name     = "sendmmsg",    .errmsg = true,
1052           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1053         { .name     = "sendmsg",    .errmsg = true,
1054           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1055         { .name     = "sendto",     .errmsg = true,
1056           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1057         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1058         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1059         { .name     = "shutdown",   .errmsg = true,
1060           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1061         { .name     = "socket",     .errmsg = true,
1062           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1063                              [1] = SCA_SK_TYPE, /* type */ },
1064           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1065         { .name     = "socketpair", .errmsg = true,
1066           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1067                              [1] = SCA_SK_TYPE, /* type */ },
1068           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1069         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
1070         { .name     = "symlinkat",  .errmsg = true,
1071           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1072         { .name     = "tgkill",     .errmsg = true,
1073           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1074         { .name     = "tkill",      .errmsg = true,
1075           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1076         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1077         { .name     = "unlinkat",   .errmsg = true,
1078           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1079         { .name     = "utimensat",  .errmsg = true,
1080           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1081         { .name     = "write",      .errmsg = true,
1082           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1083         { .name     = "writev",     .errmsg = true,
1084           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1085 };
1086
1087 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1088 {
1089         const struct syscall_fmt *fmt = fmtp;
1090         return strcmp(name, fmt->name);
1091 }
1092
1093 static struct syscall_fmt *syscall_fmt__find(const char *name)
1094 {
1095         const int nmemb = ARRAY_SIZE(syscall_fmts);
1096         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1097 }
1098
1099 struct syscall {
1100         struct event_format *tp_format;
1101         const char          *name;
1102         bool                filtered;
1103         struct syscall_fmt  *fmt;
1104         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1105         void                **arg_parm;
1106 };
1107
1108 static size_t fprintf_duration(unsigned long t, FILE *fp)
1109 {
1110         double duration = (double)t / NSEC_PER_MSEC;
1111         size_t printed = fprintf(fp, "(");
1112
1113         if (duration >= 1.0)
1114                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1115         else if (duration >= 0.01)
1116                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1117         else
1118                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1119         return printed + fprintf(fp, "): ");
1120 }
1121
1122 struct thread_trace {
1123         u64               entry_time;
1124         u64               exit_time;
1125         bool              entry_pending;
1126         unsigned long     nr_events;
1127         char              *entry_str;
1128         double            runtime_ms;
1129         struct {
1130                 int       max;
1131                 char      **table;
1132         } paths;
1133
1134         struct intlist *syscall_stats;
1135 };
1136
1137 static struct thread_trace *thread_trace__new(void)
1138 {
1139         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1140
1141         if (ttrace)
1142                 ttrace->paths.max = -1;
1143
1144         ttrace->syscall_stats = intlist__new(NULL);
1145
1146         return ttrace;
1147 }
1148
1149 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1150 {
1151         struct thread_trace *ttrace;
1152
1153         if (thread == NULL)
1154                 goto fail;
1155
1156         if (thread->priv == NULL)
1157                 thread->priv = thread_trace__new();
1158                 
1159         if (thread->priv == NULL)
1160                 goto fail;
1161
1162         ttrace = thread->priv;
1163         ++ttrace->nr_events;
1164
1165         return ttrace;
1166 fail:
1167         color_fprintf(fp, PERF_COLOR_RED,
1168                       "WARNING: not enough memory, dropping samples!\n");
1169         return NULL;
1170 }
1171
1172 struct trace {
1173         struct perf_tool        tool;
1174         struct {
1175                 int             machine;
1176                 int             open_id;
1177         }                       audit;
1178         struct {
1179                 int             max;
1180                 struct syscall  *table;
1181         } syscalls;
1182         struct record_opts      opts;
1183         struct machine          *host;
1184         u64                     base_time;
1185         FILE                    *output;
1186         unsigned long           nr_events;
1187         struct strlist          *ev_qualifier;
1188         const char              *last_vfs_getname;
1189         struct intlist          *tid_list;
1190         struct intlist          *pid_list;
1191         double                  duration_filter;
1192         double                  runtime_ms;
1193         struct {
1194                 u64             vfs_getname,
1195                                 proc_getname;
1196         } stats;
1197         bool                    not_ev_qualifier;
1198         bool                    live;
1199         bool                    full_time;
1200         bool                    sched;
1201         bool                    multiple_threads;
1202         bool                    summary;
1203         bool                    summary_only;
1204         bool                    show_comm;
1205         bool                    show_tool_stats;
1206 };
1207
1208 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1209 {
1210         struct thread_trace *ttrace = thread->priv;
1211
1212         if (fd > ttrace->paths.max) {
1213                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1214
1215                 if (npath == NULL)
1216                         return -1;
1217
1218                 if (ttrace->paths.max != -1) {
1219                         memset(npath + ttrace->paths.max + 1, 0,
1220                                (fd - ttrace->paths.max) * sizeof(char *));
1221                 } else {
1222                         memset(npath, 0, (fd + 1) * sizeof(char *));
1223                 }
1224
1225                 ttrace->paths.table = npath;
1226                 ttrace->paths.max   = fd;
1227         }
1228
1229         ttrace->paths.table[fd] = strdup(pathname);
1230
1231         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1232 }
1233
1234 static int thread__read_fd_path(struct thread *thread, int fd)
1235 {
1236         char linkname[PATH_MAX], pathname[PATH_MAX];
1237         struct stat st;
1238         int ret;
1239
1240         if (thread->pid_ == thread->tid) {
1241                 scnprintf(linkname, sizeof(linkname),
1242                           "/proc/%d/fd/%d", thread->pid_, fd);
1243         } else {
1244                 scnprintf(linkname, sizeof(linkname),
1245                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1246         }
1247
1248         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1249                 return -1;
1250
1251         ret = readlink(linkname, pathname, sizeof(pathname));
1252
1253         if (ret < 0 || ret > st.st_size)
1254                 return -1;
1255
1256         pathname[ret] = '\0';
1257         return trace__set_fd_pathname(thread, fd, pathname);
1258 }
1259
1260 static const char *thread__fd_path(struct thread *thread, int fd,
1261                                    struct trace *trace)
1262 {
1263         struct thread_trace *ttrace = thread->priv;
1264
1265         if (ttrace == NULL)
1266                 return NULL;
1267
1268         if (fd < 0)
1269                 return NULL;
1270
1271         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
1272                 if (!trace->live)
1273                         return NULL;
1274                 ++trace->stats.proc_getname;
1275                 if (thread__read_fd_path(thread, fd)) {
1276                         return NULL;
1277         }
1278
1279         return ttrace->paths.table[fd];
1280 }
1281
1282 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1283                                         struct syscall_arg *arg)
1284 {
1285         int fd = arg->val;
1286         size_t printed = scnprintf(bf, size, "%d", fd);
1287         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1288
1289         if (path)
1290                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1291
1292         return printed;
1293 }
1294
1295 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1296                                               struct syscall_arg *arg)
1297 {
1298         int fd = arg->val;
1299         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1300         struct thread_trace *ttrace = arg->thread->priv;
1301
1302         if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1303                 zfree(&ttrace->paths.table[fd]);
1304
1305         return printed;
1306 }
1307
1308 static bool trace__filter_duration(struct trace *trace, double t)
1309 {
1310         return t < (trace->duration_filter * NSEC_PER_MSEC);
1311 }
1312
1313 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1314 {
1315         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1316
1317         return fprintf(fp, "%10.3f ", ts);
1318 }
1319
1320 static bool done = false;
1321 static bool interrupted = false;
1322
1323 static void sig_handler(int sig)
1324 {
1325         done = true;
1326         interrupted = sig == SIGINT;
1327 }
1328
1329 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1330                                         u64 duration, u64 tstamp, FILE *fp)
1331 {
1332         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1333         printed += fprintf_duration(duration, fp);
1334
1335         if (trace->multiple_threads) {
1336                 if (trace->show_comm)
1337                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1338                 printed += fprintf(fp, "%d ", thread->tid);
1339         }
1340
1341         return printed;
1342 }
1343
1344 static int trace__process_event(struct trace *trace, struct machine *machine,
1345                                 union perf_event *event, struct perf_sample *sample)
1346 {
1347         int ret = 0;
1348
1349         switch (event->header.type) {
1350         case PERF_RECORD_LOST:
1351                 color_fprintf(trace->output, PERF_COLOR_RED,
1352                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1353                 ret = machine__process_lost_event(machine, event, sample);
1354         default:
1355                 ret = machine__process_event(machine, event, sample);
1356                 break;
1357         }
1358
1359         return ret;
1360 }
1361
1362 static int trace__tool_process(struct perf_tool *tool,
1363                                union perf_event *event,
1364                                struct perf_sample *sample,
1365                                struct machine *machine)
1366 {
1367         struct trace *trace = container_of(tool, struct trace, tool);
1368         return trace__process_event(trace, machine, event, sample);
1369 }
1370
1371 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1372 {
1373         int err = symbol__init();
1374
1375         if (err)
1376                 return err;
1377
1378         trace->host = machine__new_host();
1379         if (trace->host == NULL)
1380                 return -ENOMEM;
1381
1382         err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1383                                             evlist->threads, trace__tool_process, false);
1384         if (err)
1385                 symbol__exit();
1386
1387         return err;
1388 }
1389
1390 static int syscall__set_arg_fmts(struct syscall *sc)
1391 {
1392         struct format_field *field;
1393         int idx = 0;
1394
1395         sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1396         if (sc->arg_scnprintf == NULL)
1397                 return -1;
1398
1399         if (sc->fmt)
1400                 sc->arg_parm = sc->fmt->arg_parm;
1401
1402         for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1403                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1404                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1405                 else if (field->flags & FIELD_IS_POINTER)
1406                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1407                 ++idx;
1408         }
1409
1410         return 0;
1411 }
1412
1413 static int trace__read_syscall_info(struct trace *trace, int id)
1414 {
1415         char tp_name[128];
1416         struct syscall *sc;
1417         const char *name = audit_syscall_to_name(id, trace->audit.machine);
1418
1419         if (name == NULL)
1420                 return -1;
1421
1422         if (id > trace->syscalls.max) {
1423                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1424
1425                 if (nsyscalls == NULL)
1426                         return -1;
1427
1428                 if (trace->syscalls.max != -1) {
1429                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1430                                (id - trace->syscalls.max) * sizeof(*sc));
1431                 } else {
1432                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1433                 }
1434
1435                 trace->syscalls.table = nsyscalls;
1436                 trace->syscalls.max   = id;
1437         }
1438
1439         sc = trace->syscalls.table + id;
1440         sc->name = name;
1441
1442         if (trace->ev_qualifier) {
1443                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1444
1445                 if (!(in ^ trace->not_ev_qualifier)) {
1446                         sc->filtered = true;
1447                         /*
1448                          * No need to do read tracepoint information since this will be
1449                          * filtered out.
1450                          */
1451                         return 0;
1452                 }
1453         }
1454
1455         sc->fmt  = syscall_fmt__find(sc->name);
1456
1457         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1458         sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1459
1460         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1461                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1462                 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1463         }
1464
1465         if (sc->tp_format == NULL)
1466                 return -1;
1467
1468         return syscall__set_arg_fmts(sc);
1469 }
1470
1471 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1472                                       unsigned long *args, struct trace *trace,
1473                                       struct thread *thread)
1474 {
1475         size_t printed = 0;
1476
1477         if (sc->tp_format != NULL) {
1478                 struct format_field *field;
1479                 u8 bit = 1;
1480                 struct syscall_arg arg = {
1481                         .idx    = 0,
1482                         .mask   = 0,
1483                         .trace  = trace,
1484                         .thread = thread,
1485                 };
1486
1487                 for (field = sc->tp_format->format.fields->next; field;
1488                      field = field->next, ++arg.idx, bit <<= 1) {
1489                         if (arg.mask & bit)
1490                                 continue;
1491                         /*
1492                          * Suppress this argument if its value is zero and
1493                          * and we don't have a string associated in an
1494                          * strarray for it.
1495                          */
1496                         if (args[arg.idx] == 0 &&
1497                             !(sc->arg_scnprintf &&
1498                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1499                               sc->arg_parm[arg.idx]))
1500                                 continue;
1501
1502                         printed += scnprintf(bf + printed, size - printed,
1503                                              "%s%s: ", printed ? ", " : "", field->name);
1504                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1505                                 arg.val = args[arg.idx];
1506                                 if (sc->arg_parm)
1507                                         arg.parm = sc->arg_parm[arg.idx];
1508                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1509                                                                       size - printed, &arg);
1510                         } else {
1511                                 printed += scnprintf(bf + printed, size - printed,
1512                                                      "%ld", args[arg.idx]);
1513                         }
1514                 }
1515         } else {
1516                 int i = 0;
1517
1518                 while (i < 6) {
1519                         printed += scnprintf(bf + printed, size - printed,
1520                                              "%sarg%d: %ld",
1521                                              printed ? ", " : "", i, args[i]);
1522                         ++i;
1523                 }
1524         }
1525
1526         return printed;
1527 }
1528
1529 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1530                                   struct perf_sample *sample);
1531
1532 static struct syscall *trace__syscall_info(struct trace *trace,
1533                                            struct perf_evsel *evsel, int id)
1534 {
1535
1536         if (id < 0) {
1537
1538                 /*
1539                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1540                  * before that, leaving at a higher verbosity level till that is
1541                  * explained. Reproduced with plain ftrace with:
1542                  *
1543                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1544                  * grep "NR -1 " /t/trace_pipe
1545                  *
1546                  * After generating some load on the machine.
1547                  */
1548                 if (verbose > 1) {
1549                         static u64 n;
1550                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1551                                 id, perf_evsel__name(evsel), ++n);
1552                 }
1553                 return NULL;
1554         }
1555
1556         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1557             trace__read_syscall_info(trace, id))
1558                 goto out_cant_read;
1559
1560         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1561                 goto out_cant_read;
1562
1563         return &trace->syscalls.table[id];
1564
1565 out_cant_read:
1566         if (verbose) {
1567                 fprintf(trace->output, "Problems reading syscall %d", id);
1568                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1569                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1570                 fputs(" information\n", trace->output);
1571         }
1572         return NULL;
1573 }
1574
1575 static void thread__update_stats(struct thread_trace *ttrace,
1576                                  int id, struct perf_sample *sample)
1577 {
1578         struct int_node *inode;
1579         struct stats *stats;
1580         u64 duration = 0;
1581
1582         inode = intlist__findnew(ttrace->syscall_stats, id);
1583         if (inode == NULL)
1584                 return;
1585
1586         stats = inode->priv;
1587         if (stats == NULL) {
1588                 stats = malloc(sizeof(struct stats));
1589                 if (stats == NULL)
1590                         return;
1591                 init_stats(stats);
1592                 inode->priv = stats;
1593         }
1594
1595         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1596                 duration = sample->time - ttrace->entry_time;
1597
1598         update_stats(stats, duration);
1599 }
1600
1601 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1602                             struct perf_sample *sample)
1603 {
1604         char *msg;
1605         void *args;
1606         size_t printed = 0;
1607         struct thread *thread;
1608         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1609         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1610         struct thread_trace *ttrace;
1611
1612         if (sc == NULL)
1613                 return -1;
1614
1615         if (sc->filtered)
1616                 return 0;
1617
1618         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1619         ttrace = thread__trace(thread, trace->output);
1620         if (ttrace == NULL)
1621                 return -1;
1622
1623         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1624         ttrace = thread->priv;
1625
1626         if (ttrace->entry_str == NULL) {
1627                 ttrace->entry_str = malloc(1024);
1628                 if (!ttrace->entry_str)
1629                         return -1;
1630         }
1631
1632         ttrace->entry_time = sample->time;
1633         msg = ttrace->entry_str;
1634         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1635
1636         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1637                                            args, trace, thread);
1638
1639         if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1640                 if (!trace->duration_filter && !trace->summary_only) {
1641                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1642                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1643                 }
1644         } else
1645                 ttrace->entry_pending = true;
1646
1647         return 0;
1648 }
1649
1650 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1651                            struct perf_sample *sample)
1652 {
1653         int ret;
1654         u64 duration = 0;
1655         struct thread *thread;
1656         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1657         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1658         struct thread_trace *ttrace;
1659
1660         if (sc == NULL)
1661                 return -1;
1662
1663         if (sc->filtered)
1664                 return 0;
1665
1666         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1667         ttrace = thread__trace(thread, trace->output);
1668         if (ttrace == NULL)
1669                 return -1;
1670
1671         if (trace->summary)
1672                 thread__update_stats(ttrace, id, sample);
1673
1674         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1675
1676         if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1677                 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1678                 trace->last_vfs_getname = NULL;
1679                 ++trace->stats.vfs_getname;
1680         }
1681
1682         ttrace = thread->priv;
1683
1684         ttrace->exit_time = sample->time;
1685
1686         if (ttrace->entry_time) {
1687                 duration = sample->time - ttrace->entry_time;
1688                 if (trace__filter_duration(trace, duration))
1689                         goto out;
1690         } else if (trace->duration_filter)
1691                 goto out;
1692
1693         if (trace->summary_only)
1694                 goto out;
1695
1696         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1697
1698         if (ttrace->entry_pending) {
1699                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1700         } else {
1701                 fprintf(trace->output, " ... [");
1702                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1703                 fprintf(trace->output, "]: %s()", sc->name);
1704         }
1705
1706         if (sc->fmt == NULL) {
1707 signed_print:
1708                 fprintf(trace->output, ") = %d", ret);
1709         } else if (ret < 0 && sc->fmt->errmsg) {
1710                 char bf[256];
1711                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1712                            *e = audit_errno_to_name(-ret);
1713
1714                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1715         } else if (ret == 0 && sc->fmt->timeout)
1716                 fprintf(trace->output, ") = 0 Timeout");
1717         else if (sc->fmt->hexret)
1718                 fprintf(trace->output, ") = %#x", ret);
1719         else
1720                 goto signed_print;
1721
1722         fputc('\n', trace->output);
1723 out:
1724         ttrace->entry_pending = false;
1725
1726         return 0;
1727 }
1728
1729 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1730                               struct perf_sample *sample)
1731 {
1732         trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1733         return 0;
1734 }
1735
1736 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1737                                      struct perf_sample *sample)
1738 {
1739         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1740         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1741         struct thread *thread = machine__findnew_thread(trace->host,
1742                                                         sample->pid,
1743                                                         sample->tid);
1744         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1745
1746         if (ttrace == NULL)
1747                 goto out_dump;
1748
1749         ttrace->runtime_ms += runtime_ms;
1750         trace->runtime_ms += runtime_ms;
1751         return 0;
1752
1753 out_dump:
1754         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1755                evsel->name,
1756                perf_evsel__strval(evsel, sample, "comm"),
1757                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1758                runtime,
1759                perf_evsel__intval(evsel, sample, "vruntime"));
1760         return 0;
1761 }
1762
1763 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1764 {
1765         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1766             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1767                 return false;
1768
1769         if (trace->pid_list || trace->tid_list)
1770                 return true;
1771
1772         return false;
1773 }
1774
1775 static int trace__process_sample(struct perf_tool *tool,
1776                                  union perf_event *event __maybe_unused,
1777                                  struct perf_sample *sample,
1778                                  struct perf_evsel *evsel,
1779                                  struct machine *machine __maybe_unused)
1780 {
1781         struct trace *trace = container_of(tool, struct trace, tool);
1782         int err = 0;
1783
1784         tracepoint_handler handler = evsel->handler;
1785
1786         if (skip_sample(trace, sample))
1787                 return 0;
1788
1789         if (!trace->full_time && trace->base_time == 0)
1790                 trace->base_time = sample->time;
1791
1792         if (handler) {
1793                 ++trace->nr_events;
1794                 handler(trace, evsel, sample);
1795         }
1796
1797         return err;
1798 }
1799
1800 static int parse_target_str(struct trace *trace)
1801 {
1802         if (trace->opts.target.pid) {
1803                 trace->pid_list = intlist__new(trace->opts.target.pid);
1804                 if (trace->pid_list == NULL) {
1805                         pr_err("Error parsing process id string\n");
1806                         return -EINVAL;
1807                 }
1808         }
1809
1810         if (trace->opts.target.tid) {
1811                 trace->tid_list = intlist__new(trace->opts.target.tid);
1812                 if (trace->tid_list == NULL) {
1813                         pr_err("Error parsing thread id string\n");
1814                         return -EINVAL;
1815                 }
1816         }
1817
1818         return 0;
1819 }
1820
1821 static int trace__record(int argc, const char **argv)
1822 {
1823         unsigned int rec_argc, i, j;
1824         const char **rec_argv;
1825         const char * const record_args[] = {
1826                 "record",
1827                 "-R",
1828                 "-m", "1024",
1829                 "-c", "1",
1830                 "-e",
1831         };
1832
1833         /* +1 is for the event string below */
1834         rec_argc = ARRAY_SIZE(record_args) + 1 + argc;
1835         rec_argv = calloc(rec_argc + 1, sizeof(char *));
1836
1837         if (rec_argv == NULL)
1838                 return -ENOMEM;
1839
1840         for (i = 0; i < ARRAY_SIZE(record_args); i++)
1841                 rec_argv[i] = record_args[i];
1842
1843         /* event string may be different for older kernels - e.g., RHEL6 */
1844         if (is_valid_tracepoint("raw_syscalls:sys_enter"))
1845                 rec_argv[i] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
1846         else if (is_valid_tracepoint("syscalls:sys_enter"))
1847                 rec_argv[i] = "syscalls:sys_enter,syscalls:sys_exit";
1848         else {
1849                 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
1850                 return -1;
1851         }
1852         i++;
1853
1854         for (j = 0; j < (unsigned int)argc; j++, i++)
1855                 rec_argv[i] = argv[j];
1856
1857         return cmd_record(i, rec_argv, NULL);
1858 }
1859
1860 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1861
1862 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1863 {
1864         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
1865         if (evsel == NULL)
1866                 return;
1867
1868         if (perf_evsel__field(evsel, "pathname") == NULL) {
1869                 perf_evsel__delete(evsel);
1870                 return;
1871         }
1872
1873         evsel->handler = trace__vfs_getname;
1874         perf_evlist__add(evlist, evsel);
1875 }
1876
1877 static int trace__run(struct trace *trace, int argc, const char **argv)
1878 {
1879         struct perf_evlist *evlist = perf_evlist__new();
1880         struct perf_evsel *evsel;
1881         int err = -1, i;
1882         unsigned long before;
1883         const bool forks = argc > 0;
1884
1885         trace->live = true;
1886
1887         if (evlist == NULL) {
1888                 fprintf(trace->output, "Not enough memory to run!\n");
1889                 goto out;
1890         }
1891
1892         if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
1893                 goto out_error_tp;
1894
1895         perf_evlist__add_vfs_getname(evlist);
1896
1897         if (trace->sched &&
1898                 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1899                                 trace__sched_stat_runtime))
1900                 goto out_error_tp;
1901
1902         err = perf_evlist__create_maps(evlist, &trace->opts.target);
1903         if (err < 0) {
1904                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1905                 goto out_delete_evlist;
1906         }
1907
1908         err = trace__symbols_init(trace, evlist);
1909         if (err < 0) {
1910                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1911                 goto out_delete_evlist;
1912         }
1913
1914         perf_evlist__config(evlist, &trace->opts);
1915
1916         signal(SIGCHLD, sig_handler);
1917         signal(SIGINT, sig_handler);
1918
1919         if (forks) {
1920                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1921                                                     argv, false, NULL);
1922                 if (err < 0) {
1923                         fprintf(trace->output, "Couldn't run the workload!\n");
1924                         goto out_delete_evlist;
1925                 }
1926         }
1927
1928         err = perf_evlist__open(evlist);
1929         if (err < 0)
1930                 goto out_error_open;
1931
1932         err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
1933         if (err < 0) {
1934                 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1935                 goto out_delete_evlist;
1936         }
1937
1938         perf_evlist__enable(evlist);
1939
1940         if (forks)
1941                 perf_evlist__start_workload(evlist);
1942
1943         trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1944 again:
1945         before = trace->nr_events;
1946
1947         for (i = 0; i < evlist->nr_mmaps; i++) {
1948                 union perf_event *event;
1949
1950                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1951                         const u32 type = event->header.type;
1952                         tracepoint_handler handler;
1953                         struct perf_sample sample;
1954
1955                         ++trace->nr_events;
1956
1957                         err = perf_evlist__parse_sample(evlist, event, &sample);
1958                         if (err) {
1959                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1960                                 goto next_event;
1961                         }
1962
1963                         if (!trace->full_time && trace->base_time == 0)
1964                                 trace->base_time = sample.time;
1965
1966                         if (type != PERF_RECORD_SAMPLE) {
1967                                 trace__process_event(trace, trace->host, event, &sample);
1968                                 continue;
1969                         }
1970
1971                         evsel = perf_evlist__id2evsel(evlist, sample.id);
1972                         if (evsel == NULL) {
1973                                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1974                                 goto next_event;
1975                         }
1976
1977                         if (sample.raw_data == NULL) {
1978                                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1979                                        perf_evsel__name(evsel), sample.tid,
1980                                        sample.cpu, sample.raw_size);
1981                                 goto next_event;
1982                         }
1983
1984                         handler = evsel->handler;
1985                         handler(trace, evsel, &sample);
1986 next_event:
1987                         perf_evlist__mmap_consume(evlist, i);
1988
1989                         if (interrupted)
1990                                 goto out_disable;
1991                 }
1992         }
1993
1994         if (trace->nr_events == before) {
1995                 int timeout = done ? 100 : -1;
1996
1997                 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
1998                         goto again;
1999         } else {
2000                 goto again;
2001         }
2002
2003 out_disable:
2004         perf_evlist__disable(evlist);
2005
2006         if (!err) {
2007                 if (trace->summary)
2008                         trace__fprintf_thread_summary(trace, trace->output);
2009
2010                 if (trace->show_tool_stats) {
2011                         fprintf(trace->output, "Stats:\n "
2012                                                " vfs_getname : %" PRIu64 "\n"
2013                                                " proc_getname: %" PRIu64 "\n",
2014                                 trace->stats.vfs_getname,
2015                                 trace->stats.proc_getname);
2016                 }
2017         }
2018
2019 out_delete_evlist:
2020         perf_evlist__delete(evlist);
2021 out:
2022         trace->live = false;
2023         return err;
2024 {
2025         char errbuf[BUFSIZ];
2026
2027 out_error_tp:
2028         perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
2029         goto out_error;
2030
2031 out_error_open:
2032         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2033
2034 out_error:
2035         fprintf(trace->output, "%s\n", errbuf);
2036         goto out_delete_evlist;
2037 }
2038 }
2039
2040 static int trace__replay(struct trace *trace)
2041 {
2042         const struct perf_evsel_str_handler handlers[] = {
2043                 { "probe:vfs_getname",       trace__vfs_getname, },
2044         };
2045         struct perf_data_file file = {
2046                 .path  = input_name,
2047                 .mode  = PERF_DATA_MODE_READ,
2048         };
2049         struct perf_session *session;
2050         struct perf_evsel *evsel;
2051         int err = -1;
2052
2053         trace->tool.sample        = trace__process_sample;
2054         trace->tool.mmap          = perf_event__process_mmap;
2055         trace->tool.mmap2         = perf_event__process_mmap2;
2056         trace->tool.comm          = perf_event__process_comm;
2057         trace->tool.exit          = perf_event__process_exit;
2058         trace->tool.fork          = perf_event__process_fork;
2059         trace->tool.attr          = perf_event__process_attr;
2060         trace->tool.tracing_data = perf_event__process_tracing_data;
2061         trace->tool.build_id      = perf_event__process_build_id;
2062
2063         trace->tool.ordered_samples = true;
2064         trace->tool.ordering_requires_timestamps = true;
2065
2066         /* add tid to output */
2067         trace->multiple_threads = true;
2068
2069         if (symbol__init() < 0)
2070                 return -1;
2071
2072         session = perf_session__new(&file, false, &trace->tool);
2073         if (session == NULL)
2074                 return -ENOMEM;
2075
2076         trace->host = &session->machines.host;
2077
2078         err = perf_session__set_tracepoints_handlers(session, handlers);
2079         if (err)
2080                 goto out;
2081
2082         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2083                                                      "raw_syscalls:sys_enter");
2084         /* older kernels have syscalls tp versus raw_syscalls */
2085         if (evsel == NULL)
2086                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2087                                                              "syscalls:sys_enter");
2088         if (evsel == NULL) {
2089                 pr_err("Data file does not have raw_syscalls:sys_enter event\n");
2090                 goto out;
2091         }
2092
2093         if (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2094             perf_evsel__init_sc_tp_ptr_field(evsel, args)) {
2095                 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2096                 goto out;
2097         }
2098
2099         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2100                                                      "raw_syscalls:sys_exit");
2101         if (evsel == NULL)
2102                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2103                                                              "syscalls:sys_exit");
2104         if (evsel == NULL) {
2105                 pr_err("Data file does not have raw_syscalls:sys_exit event\n");
2106                 goto out;
2107         }
2108
2109         if (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2110             perf_evsel__init_sc_tp_uint_field(evsel, ret)) {
2111                 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2112                 goto out;
2113         }
2114
2115         err = parse_target_str(trace);
2116         if (err != 0)
2117                 goto out;
2118
2119         setup_pager();
2120
2121         err = perf_session__process_events(session, &trace->tool);
2122         if (err)
2123                 pr_err("Failed to process events, error %d", err);
2124
2125         else if (trace->summary)
2126                 trace__fprintf_thread_summary(trace, trace->output);
2127
2128 out:
2129         perf_session__delete(session);
2130
2131         return err;
2132 }
2133
2134 static size_t trace__fprintf_threads_header(FILE *fp)
2135 {
2136         size_t printed;
2137
2138         printed  = fprintf(fp, "\n Summary of events:\n\n");
2139
2140         return printed;
2141 }
2142
2143 static size_t thread__dump_stats(struct thread_trace *ttrace,
2144                                  struct trace *trace, FILE *fp)
2145 {
2146         struct stats *stats;
2147         size_t printed = 0;
2148         struct syscall *sc;
2149         struct int_node *inode = intlist__first(ttrace->syscall_stats);
2150
2151         if (inode == NULL)
2152                 return 0;
2153
2154         printed += fprintf(fp, "\n");
2155
2156         printed += fprintf(fp, "   syscall            calls      min       avg       max      stddev\n");
2157         printed += fprintf(fp, "                               (msec)    (msec)    (msec)        (%%)\n");
2158         printed += fprintf(fp, "   --------------- -------- --------- --------- ---------     ------\n");
2159
2160         /* each int_node is a syscall */
2161         while (inode) {
2162                 stats = inode->priv;
2163                 if (stats) {
2164                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2165                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2166                         double avg = avg_stats(stats);
2167                         double pct;
2168                         u64 n = (u64) stats->n;
2169
2170                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2171                         avg /= NSEC_PER_MSEC;
2172
2173                         sc = &trace->syscalls.table[inode->i];
2174                         printed += fprintf(fp, "   %-15s", sc->name);
2175                         printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2176                                            n, min, avg);
2177                         printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2178                 }
2179
2180                 inode = intlist__next(inode);
2181         }
2182
2183         printed += fprintf(fp, "\n\n");
2184
2185         return printed;
2186 }
2187
2188 /* struct used to pass data to per-thread function */
2189 struct summary_data {
2190         FILE *fp;
2191         struct trace *trace;
2192         size_t printed;
2193 };
2194
2195 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2196 {
2197         struct summary_data *data = priv;
2198         FILE *fp = data->fp;
2199         size_t printed = data->printed;
2200         struct trace *trace = data->trace;
2201         struct thread_trace *ttrace = thread->priv;
2202         double ratio;
2203
2204         if (ttrace == NULL)
2205                 return 0;
2206
2207         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2208
2209         printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2210         printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2211         printed += fprintf(fp, "%.1f%%", ratio);
2212         printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2213         printed += thread__dump_stats(ttrace, trace, fp);
2214
2215         data->printed += printed;
2216
2217         return 0;
2218 }
2219
2220 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2221 {
2222         struct summary_data data = {
2223                 .fp = fp,
2224                 .trace = trace
2225         };
2226         data.printed = trace__fprintf_threads_header(fp);
2227
2228         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2229
2230         return data.printed;
2231 }
2232
2233 static int trace__set_duration(const struct option *opt, const char *str,
2234                                int unset __maybe_unused)
2235 {
2236         struct trace *trace = opt->value;
2237
2238         trace->duration_filter = atof(str);
2239         return 0;
2240 }
2241
2242 static int trace__open_output(struct trace *trace, const char *filename)
2243 {
2244         struct stat st;
2245
2246         if (!stat(filename, &st) && st.st_size) {
2247                 char oldname[PATH_MAX];
2248
2249                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2250                 unlink(oldname);
2251                 rename(filename, oldname);
2252         }
2253
2254         trace->output = fopen(filename, "w");
2255
2256         return trace->output == NULL ? -errno : 0;
2257 }
2258
2259 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2260 {
2261         const char * const trace_usage[] = {
2262                 "perf trace [<options>] [<command>]",
2263                 "perf trace [<options>] -- <command> [<options>]",
2264                 "perf trace record [<options>] [<command>]",
2265                 "perf trace record [<options>] -- <command> [<options>]",
2266                 NULL
2267         };
2268         struct trace trace = {
2269                 .audit = {
2270                         .machine = audit_detect_machine(),
2271                         .open_id = audit_name_to_syscall("open", trace.audit.machine),
2272                 },
2273                 .syscalls = {
2274                         . max = -1,
2275                 },
2276                 .opts = {
2277                         .target = {
2278                                 .uid       = UINT_MAX,
2279                                 .uses_mmap = true,
2280                         },
2281                         .user_freq     = UINT_MAX,
2282                         .user_interval = ULLONG_MAX,
2283                         .no_buffering  = true,
2284                         .mmap_pages    = 1024,
2285                 },
2286                 .output = stdout,
2287                 .show_comm = true,
2288         };
2289         const char *output_name = NULL;
2290         const char *ev_qualifier_str = NULL;
2291         const struct option trace_options[] = {
2292         OPT_BOOLEAN(0, "comm", &trace.show_comm,
2293                     "show the thread COMM next to its id"),
2294         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2295         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2296                     "list of events to trace"),
2297         OPT_STRING('o', "output", &output_name, "file", "output file name"),
2298         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2299         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2300                     "trace events on existing process id"),
2301         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2302                     "trace events on existing thread id"),
2303         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2304                     "system-wide collection from all CPUs"),
2305         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2306                     "list of cpus to monitor"),
2307         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2308                     "child tasks do not inherit counters"),
2309         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2310                      "number of mmap data pages",
2311                      perf_evlist__parse_mmap_pages),
2312         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2313                    "user to profile"),
2314         OPT_CALLBACK(0, "duration", &trace, "float",
2315                      "show only events with duration > N.M ms",
2316                      trace__set_duration),
2317         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2318         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2319         OPT_BOOLEAN('T', "time", &trace.full_time,
2320                     "Show full timestamp, not time relative to first start"),
2321         OPT_BOOLEAN('s', "summary", &trace.summary_only,
2322                     "Show only syscall summary with statistics"),
2323         OPT_BOOLEAN('S', "with-summary", &trace.summary,
2324                     "Show all syscalls and summary with statistics"),
2325         OPT_END()
2326         };
2327         int err;
2328         char bf[BUFSIZ];
2329
2330         if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2331                 return trace__record(argc-2, &argv[2]);
2332
2333         argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2334
2335         /* summary_only implies summary option, but don't overwrite summary if set */
2336         if (trace.summary_only)
2337                 trace.summary = trace.summary_only;
2338
2339         if (output_name != NULL) {
2340                 err = trace__open_output(&trace, output_name);
2341                 if (err < 0) {
2342                         perror("failed to create output file");
2343                         goto out;
2344                 }
2345         }
2346
2347         if (ev_qualifier_str != NULL) {
2348                 const char *s = ev_qualifier_str;
2349
2350                 trace.not_ev_qualifier = *s == '!';
2351                 if (trace.not_ev_qualifier)
2352                         ++s;
2353                 trace.ev_qualifier = strlist__new(true, s);
2354                 if (trace.ev_qualifier == NULL) {
2355                         fputs("Not enough memory to parse event qualifier",
2356                               trace.output);
2357                         err = -ENOMEM;
2358                         goto out_close;
2359                 }
2360         }
2361
2362         err = target__validate(&trace.opts.target);
2363         if (err) {
2364                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2365                 fprintf(trace.output, "%s", bf);
2366                 goto out_close;
2367         }
2368
2369         err = target__parse_uid(&trace.opts.target);
2370         if (err) {
2371                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2372                 fprintf(trace.output, "%s", bf);
2373                 goto out_close;
2374         }
2375
2376         if (!argc && target__none(&trace.opts.target))
2377                 trace.opts.target.system_wide = true;
2378
2379         if (input_name)
2380                 err = trace__replay(&trace);
2381         else
2382                 err = trace__run(&trace, argc, argv);
2383
2384 out_close:
2385         if (output_name != NULL)
2386                 fclose(trace.output);
2387 out:
2388         return err;
2389 }