]> Pileus Git - ~andy/linux/blob - kernel/trace/trace_uprobe.c
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
[~andy/linux] / kernel / trace / trace_uprobe.c
1 /*
2  * uprobes-based tracing events
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16  *
17  * Copyright (C) IBM Corporation, 2010-2012
18  * Author:      Srikar Dronamraju <srikar@linux.vnet.ibm.com>
19  */
20
21 #include <linux/module.h>
22 #include <linux/uaccess.h>
23 #include <linux/uprobes.h>
24 #include <linux/namei.h>
25 #include <linux/string.h>
26
27 #include "trace_probe.h"
28
29 #define UPROBE_EVENT_SYSTEM     "uprobes"
30
31 struct uprobe_trace_entry_head {
32         struct trace_entry      ent;
33         unsigned long           vaddr[];
34 };
35
36 #define SIZEOF_TRACE_ENTRY(is_return)                   \
37         (sizeof(struct uprobe_trace_entry_head) +       \
38          sizeof(unsigned long) * (is_return ? 2 : 1))
39
40 #define DATAOF_TRACE_ENTRY(entry, is_return)            \
41         ((void*)(entry) + SIZEOF_TRACE_ENTRY(is_return))
42
43 struct trace_uprobe_filter {
44         rwlock_t                rwlock;
45         int                     nr_systemwide;
46         struct list_head        perf_events;
47 };
48
49 /*
50  * uprobe event core functions
51  */
52 struct trace_uprobe {
53         struct list_head                list;
54         struct ftrace_event_class       class;
55         struct ftrace_event_call        call;
56         struct trace_uprobe_filter      filter;
57         struct uprobe_consumer          consumer;
58         struct inode                    *inode;
59         char                            *filename;
60         unsigned long                   offset;
61         unsigned long                   nhit;
62         unsigned int                    flags;  /* For TP_FLAG_* */
63         ssize_t                         size;   /* trace entry size */
64         unsigned int                    nr_args;
65         struct probe_arg                args[];
66 };
67
68 #define SIZEOF_TRACE_UPROBE(n)                  \
69         (offsetof(struct trace_uprobe, args) +  \
70         (sizeof(struct probe_arg) * (n)))
71
72 static int register_uprobe_event(struct trace_uprobe *tu);
73 static int unregister_uprobe_event(struct trace_uprobe *tu);
74
75 static DEFINE_MUTEX(uprobe_lock);
76 static LIST_HEAD(uprobe_list);
77
78 static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs);
79 static int uretprobe_dispatcher(struct uprobe_consumer *con,
80                                 unsigned long func, struct pt_regs *regs);
81
82 static inline void init_trace_uprobe_filter(struct trace_uprobe_filter *filter)
83 {
84         rwlock_init(&filter->rwlock);
85         filter->nr_systemwide = 0;
86         INIT_LIST_HEAD(&filter->perf_events);
87 }
88
89 static inline bool uprobe_filter_is_empty(struct trace_uprobe_filter *filter)
90 {
91         return !filter->nr_systemwide && list_empty(&filter->perf_events);
92 }
93
94 static inline bool is_ret_probe(struct trace_uprobe *tu)
95 {
96         return tu->consumer.ret_handler != NULL;
97 }
98
99 /*
100  * Allocate new trace_uprobe and initialize it (including uprobes).
101  */
102 static struct trace_uprobe *
103 alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret)
104 {
105         struct trace_uprobe *tu;
106
107         if (!event || !is_good_name(event))
108                 return ERR_PTR(-EINVAL);
109
110         if (!group || !is_good_name(group))
111                 return ERR_PTR(-EINVAL);
112
113         tu = kzalloc(SIZEOF_TRACE_UPROBE(nargs), GFP_KERNEL);
114         if (!tu)
115                 return ERR_PTR(-ENOMEM);
116
117         tu->call.class = &tu->class;
118         tu->call.name = kstrdup(event, GFP_KERNEL);
119         if (!tu->call.name)
120                 goto error;
121
122         tu->class.system = kstrdup(group, GFP_KERNEL);
123         if (!tu->class.system)
124                 goto error;
125
126         INIT_LIST_HEAD(&tu->list);
127         tu->consumer.handler = uprobe_dispatcher;
128         if (is_ret)
129                 tu->consumer.ret_handler = uretprobe_dispatcher;
130         init_trace_uprobe_filter(&tu->filter);
131         tu->call.flags |= TRACE_EVENT_FL_USE_CALL_FILTER;
132         return tu;
133
134 error:
135         kfree(tu->call.name);
136         kfree(tu);
137
138         return ERR_PTR(-ENOMEM);
139 }
140
141 static void free_trace_uprobe(struct trace_uprobe *tu)
142 {
143         int i;
144
145         for (i = 0; i < tu->nr_args; i++)
146                 traceprobe_free_probe_arg(&tu->args[i]);
147
148         iput(tu->inode);
149         kfree(tu->call.class->system);
150         kfree(tu->call.name);
151         kfree(tu->filename);
152         kfree(tu);
153 }
154
155 static struct trace_uprobe *find_probe_event(const char *event, const char *group)
156 {
157         struct trace_uprobe *tu;
158
159         list_for_each_entry(tu, &uprobe_list, list)
160                 if (strcmp(tu->call.name, event) == 0 &&
161                     strcmp(tu->call.class->system, group) == 0)
162                         return tu;
163
164         return NULL;
165 }
166
167 /* Unregister a trace_uprobe and probe_event: call with locking uprobe_lock */
168 static int unregister_trace_uprobe(struct trace_uprobe *tu)
169 {
170         int ret;
171
172         ret = unregister_uprobe_event(tu);
173         if (ret)
174                 return ret;
175
176         list_del(&tu->list);
177         free_trace_uprobe(tu);
178         return 0;
179 }
180
181 /* Register a trace_uprobe and probe_event */
182 static int register_trace_uprobe(struct trace_uprobe *tu)
183 {
184         struct trace_uprobe *old_tp;
185         int ret;
186
187         mutex_lock(&uprobe_lock);
188
189         /* register as an event */
190         old_tp = find_probe_event(tu->call.name, tu->call.class->system);
191         if (old_tp) {
192                 /* delete old event */
193                 ret = unregister_trace_uprobe(old_tp);
194                 if (ret)
195                         goto end;
196         }
197
198         ret = register_uprobe_event(tu);
199         if (ret) {
200                 pr_warning("Failed to register probe event(%d)\n", ret);
201                 goto end;
202         }
203
204         list_add_tail(&tu->list, &uprobe_list);
205
206 end:
207         mutex_unlock(&uprobe_lock);
208
209         return ret;
210 }
211
212 /*
213  * Argument syntax:
214  *  - Add uprobe: p|r[:[GRP/]EVENT] PATH:SYMBOL [FETCHARGS]
215  *
216  *  - Remove uprobe: -:[GRP/]EVENT
217  */
218 static int create_trace_uprobe(int argc, char **argv)
219 {
220         struct trace_uprobe *tu;
221         struct inode *inode;
222         char *arg, *event, *group, *filename;
223         char buf[MAX_EVENT_NAME_LEN];
224         struct path path;
225         unsigned long offset;
226         bool is_delete, is_return;
227         int i, ret;
228
229         inode = NULL;
230         ret = 0;
231         is_delete = false;
232         is_return = false;
233         event = NULL;
234         group = NULL;
235
236         /* argc must be >= 1 */
237         if (argv[0][0] == '-')
238                 is_delete = true;
239         else if (argv[0][0] == 'r')
240                 is_return = true;
241         else if (argv[0][0] != 'p') {
242                 pr_info("Probe definition must be started with 'p', 'r' or '-'.\n");
243                 return -EINVAL;
244         }
245
246         if (argv[0][1] == ':') {
247                 event = &argv[0][2];
248                 arg = strchr(event, '/');
249
250                 if (arg) {
251                         group = event;
252                         event = arg + 1;
253                         event[-1] = '\0';
254
255                         if (strlen(group) == 0) {
256                                 pr_info("Group name is not specified\n");
257                                 return -EINVAL;
258                         }
259                 }
260                 if (strlen(event) == 0) {
261                         pr_info("Event name is not specified\n");
262                         return -EINVAL;
263                 }
264         }
265         if (!group)
266                 group = UPROBE_EVENT_SYSTEM;
267
268         if (is_delete) {
269                 int ret;
270
271                 if (!event) {
272                         pr_info("Delete command needs an event name.\n");
273                         return -EINVAL;
274                 }
275                 mutex_lock(&uprobe_lock);
276                 tu = find_probe_event(event, group);
277
278                 if (!tu) {
279                         mutex_unlock(&uprobe_lock);
280                         pr_info("Event %s/%s doesn't exist.\n", group, event);
281                         return -ENOENT;
282                 }
283                 /* delete an event */
284                 ret = unregister_trace_uprobe(tu);
285                 mutex_unlock(&uprobe_lock);
286                 return ret;
287         }
288
289         if (argc < 2) {
290                 pr_info("Probe point is not specified.\n");
291                 return -EINVAL;
292         }
293         if (isdigit(argv[1][0])) {
294                 pr_info("probe point must be have a filename.\n");
295                 return -EINVAL;
296         }
297         arg = strchr(argv[1], ':');
298         if (!arg) {
299                 ret = -EINVAL;
300                 goto fail_address_parse;
301         }
302
303         *arg++ = '\0';
304         filename = argv[1];
305         ret = kern_path(filename, LOOKUP_FOLLOW, &path);
306         if (ret)
307                 goto fail_address_parse;
308
309         inode = igrab(path.dentry->d_inode);
310         path_put(&path);
311
312         if (!inode || !S_ISREG(inode->i_mode)) {
313                 ret = -EINVAL;
314                 goto fail_address_parse;
315         }
316
317         ret = kstrtoul(arg, 0, &offset);
318         if (ret)
319                 goto fail_address_parse;
320
321         argc -= 2;
322         argv += 2;
323
324         /* setup a probe */
325         if (!event) {
326                 char *tail;
327                 char *ptr;
328
329                 tail = kstrdup(kbasename(filename), GFP_KERNEL);
330                 if (!tail) {
331                         ret = -ENOMEM;
332                         goto fail_address_parse;
333                 }
334
335                 ptr = strpbrk(tail, ".-_");
336                 if (ptr)
337                         *ptr = '\0';
338
339                 snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_0x%lx", 'p', tail, offset);
340                 event = buf;
341                 kfree(tail);
342         }
343
344         tu = alloc_trace_uprobe(group, event, argc, is_return);
345         if (IS_ERR(tu)) {
346                 pr_info("Failed to allocate trace_uprobe.(%d)\n", (int)PTR_ERR(tu));
347                 ret = PTR_ERR(tu);
348                 goto fail_address_parse;
349         }
350         tu->offset = offset;
351         tu->inode = inode;
352         tu->filename = kstrdup(filename, GFP_KERNEL);
353
354         if (!tu->filename) {
355                 pr_info("Failed to allocate filename.\n");
356                 ret = -ENOMEM;
357                 goto error;
358         }
359
360         /* parse arguments */
361         ret = 0;
362         for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
363                 /* Increment count for freeing args in error case */
364                 tu->nr_args++;
365
366                 /* Parse argument name */
367                 arg = strchr(argv[i], '=');
368                 if (arg) {
369                         *arg++ = '\0';
370                         tu->args[i].name = kstrdup(argv[i], GFP_KERNEL);
371                 } else {
372                         arg = argv[i];
373                         /* If argument name is omitted, set "argN" */
374                         snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1);
375                         tu->args[i].name = kstrdup(buf, GFP_KERNEL);
376                 }
377
378                 if (!tu->args[i].name) {
379                         pr_info("Failed to allocate argument[%d] name.\n", i);
380                         ret = -ENOMEM;
381                         goto error;
382                 }
383
384                 if (!is_good_name(tu->args[i].name)) {
385                         pr_info("Invalid argument[%d] name: %s\n", i, tu->args[i].name);
386                         ret = -EINVAL;
387                         goto error;
388                 }
389
390                 if (traceprobe_conflict_field_name(tu->args[i].name, tu->args, i)) {
391                         pr_info("Argument[%d] name '%s' conflicts with "
392                                 "another field.\n", i, argv[i]);
393                         ret = -EINVAL;
394                         goto error;
395                 }
396
397                 /* Parse fetch argument */
398                 ret = traceprobe_parse_probe_arg(arg, &tu->size, &tu->args[i], false, false);
399                 if (ret) {
400                         pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
401                         goto error;
402                 }
403         }
404
405         ret = register_trace_uprobe(tu);
406         if (ret)
407                 goto error;
408         return 0;
409
410 error:
411         free_trace_uprobe(tu);
412         return ret;
413
414 fail_address_parse:
415         if (inode)
416                 iput(inode);
417
418         pr_info("Failed to parse address or file.\n");
419
420         return ret;
421 }
422
423 static int cleanup_all_probes(void)
424 {
425         struct trace_uprobe *tu;
426         int ret = 0;
427
428         mutex_lock(&uprobe_lock);
429         while (!list_empty(&uprobe_list)) {
430                 tu = list_entry(uprobe_list.next, struct trace_uprobe, list);
431                 ret = unregister_trace_uprobe(tu);
432                 if (ret)
433                         break;
434         }
435         mutex_unlock(&uprobe_lock);
436         return ret;
437 }
438
439 /* Probes listing interfaces */
440 static void *probes_seq_start(struct seq_file *m, loff_t *pos)
441 {
442         mutex_lock(&uprobe_lock);
443         return seq_list_start(&uprobe_list, *pos);
444 }
445
446 static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
447 {
448         return seq_list_next(v, &uprobe_list, pos);
449 }
450
451 static void probes_seq_stop(struct seq_file *m, void *v)
452 {
453         mutex_unlock(&uprobe_lock);
454 }
455
456 static int probes_seq_show(struct seq_file *m, void *v)
457 {
458         struct trace_uprobe *tu = v;
459         char c = is_ret_probe(tu) ? 'r' : 'p';
460         int i;
461
462         seq_printf(m, "%c:%s/%s", c, tu->call.class->system, tu->call.name);
463         seq_printf(m, " %s:0x%p", tu->filename, (void *)tu->offset);
464
465         for (i = 0; i < tu->nr_args; i++)
466                 seq_printf(m, " %s=%s", tu->args[i].name, tu->args[i].comm);
467
468         seq_printf(m, "\n");
469         return 0;
470 }
471
472 static const struct seq_operations probes_seq_op = {
473         .start  = probes_seq_start,
474         .next   = probes_seq_next,
475         .stop   = probes_seq_stop,
476         .show   = probes_seq_show
477 };
478
479 static int probes_open(struct inode *inode, struct file *file)
480 {
481         int ret;
482
483         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
484                 ret = cleanup_all_probes();
485                 if (ret)
486                         return ret;
487         }
488
489         return seq_open(file, &probes_seq_op);
490 }
491
492 static ssize_t probes_write(struct file *file, const char __user *buffer,
493                             size_t count, loff_t *ppos)
494 {
495         return traceprobe_probes_write(file, buffer, count, ppos, create_trace_uprobe);
496 }
497
498 static const struct file_operations uprobe_events_ops = {
499         .owner          = THIS_MODULE,
500         .open           = probes_open,
501         .read           = seq_read,
502         .llseek         = seq_lseek,
503         .release        = seq_release,
504         .write          = probes_write,
505 };
506
507 /* Probes profiling interfaces */
508 static int probes_profile_seq_show(struct seq_file *m, void *v)
509 {
510         struct trace_uprobe *tu = v;
511
512         seq_printf(m, "  %s %-44s %15lu\n", tu->filename, tu->call.name, tu->nhit);
513         return 0;
514 }
515
516 static const struct seq_operations profile_seq_op = {
517         .start  = probes_seq_start,
518         .next   = probes_seq_next,
519         .stop   = probes_seq_stop,
520         .show   = probes_profile_seq_show
521 };
522
523 static int profile_open(struct inode *inode, struct file *file)
524 {
525         return seq_open(file, &profile_seq_op);
526 }
527
528 static const struct file_operations uprobe_profile_ops = {
529         .owner          = THIS_MODULE,
530         .open           = profile_open,
531         .read           = seq_read,
532         .llseek         = seq_lseek,
533         .release        = seq_release,
534 };
535
536 static void uprobe_trace_print(struct trace_uprobe *tu,
537                                 unsigned long func, struct pt_regs *regs)
538 {
539         struct uprobe_trace_entry_head *entry;
540         struct ring_buffer_event *event;
541         struct ring_buffer *buffer;
542         void *data;
543         int size, i;
544         struct ftrace_event_call *call = &tu->call;
545
546         size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
547         event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
548                                                   size + tu->size, 0, 0);
549         if (!event)
550                 return;
551
552         entry = ring_buffer_event_data(event);
553         if (is_ret_probe(tu)) {
554                 entry->vaddr[0] = func;
555                 entry->vaddr[1] = instruction_pointer(regs);
556                 data = DATAOF_TRACE_ENTRY(entry, true);
557         } else {
558                 entry->vaddr[0] = instruction_pointer(regs);
559                 data = DATAOF_TRACE_ENTRY(entry, false);
560         }
561
562         for (i = 0; i < tu->nr_args; i++)
563                 call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
564
565         if (!call_filter_check_discard(call, entry, buffer, event))
566                 trace_buffer_unlock_commit(buffer, event, 0, 0);
567 }
568
569 /* uprobe handler */
570 static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
571 {
572         if (!is_ret_probe(tu))
573                 uprobe_trace_print(tu, 0, regs);
574         return 0;
575 }
576
577 static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func,
578                                 struct pt_regs *regs)
579 {
580         uprobe_trace_print(tu, func, regs);
581 }
582
583 /* Event entry printers */
584 static enum print_line_t
585 print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *event)
586 {
587         struct uprobe_trace_entry_head *entry;
588         struct trace_seq *s = &iter->seq;
589         struct trace_uprobe *tu;
590         u8 *data;
591         int i;
592
593         entry = (struct uprobe_trace_entry_head *)iter->ent;
594         tu = container_of(event, struct trace_uprobe, call.event);
595
596         if (is_ret_probe(tu)) {
597                 if (!trace_seq_printf(s, "%s: (0x%lx <- 0x%lx)", tu->call.name,
598                                         entry->vaddr[1], entry->vaddr[0]))
599                         goto partial;
600                 data = DATAOF_TRACE_ENTRY(entry, true);
601         } else {
602                 if (!trace_seq_printf(s, "%s: (0x%lx)", tu->call.name,
603                                         entry->vaddr[0]))
604                         goto partial;
605                 data = DATAOF_TRACE_ENTRY(entry, false);
606         }
607
608         for (i = 0; i < tu->nr_args; i++) {
609                 if (!tu->args[i].type->print(s, tu->args[i].name,
610                                              data + tu->args[i].offset, entry))
611                         goto partial;
612         }
613
614         if (trace_seq_puts(s, "\n"))
615                 return TRACE_TYPE_HANDLED;
616
617 partial:
618         return TRACE_TYPE_PARTIAL_LINE;
619 }
620
621 static inline bool is_trace_uprobe_enabled(struct trace_uprobe *tu)
622 {
623         return tu->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE);
624 }
625
626 typedef bool (*filter_func_t)(struct uprobe_consumer *self,
627                                 enum uprobe_filter_ctx ctx,
628                                 struct mm_struct *mm);
629
630 static int
631 probe_event_enable(struct trace_uprobe *tu, int flag, filter_func_t filter)
632 {
633         int ret = 0;
634
635         if (is_trace_uprobe_enabled(tu))
636                 return -EINTR;
637
638         WARN_ON(!uprobe_filter_is_empty(&tu->filter));
639
640         tu->flags |= flag;
641         tu->consumer.filter = filter;
642         ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
643         if (ret)
644                 tu->flags &= ~flag;
645
646         return ret;
647 }
648
649 static void probe_event_disable(struct trace_uprobe *tu, int flag)
650 {
651         if (!is_trace_uprobe_enabled(tu))
652                 return;
653
654         WARN_ON(!uprobe_filter_is_empty(&tu->filter));
655
656         uprobe_unregister(tu->inode, tu->offset, &tu->consumer);
657         tu->flags &= ~flag;
658 }
659
660 static int uprobe_event_define_fields(struct ftrace_event_call *event_call)
661 {
662         int ret, i, size;
663         struct uprobe_trace_entry_head field;
664         struct trace_uprobe *tu = event_call->data;
665
666         if (is_ret_probe(tu)) {
667                 DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_FUNC, 0);
668                 DEFINE_FIELD(unsigned long, vaddr[1], FIELD_STRING_RETIP, 0);
669                 size = SIZEOF_TRACE_ENTRY(true);
670         } else {
671                 DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_IP, 0);
672                 size = SIZEOF_TRACE_ENTRY(false);
673         }
674         /* Set argument names as fields */
675         for (i = 0; i < tu->nr_args; i++) {
676                 ret = trace_define_field(event_call, tu->args[i].type->fmttype,
677                                          tu->args[i].name,
678                                          size + tu->args[i].offset,
679                                          tu->args[i].type->size,
680                                          tu->args[i].type->is_signed,
681                                          FILTER_OTHER);
682
683                 if (ret)
684                         return ret;
685         }
686         return 0;
687 }
688
689 #define LEN_OR_ZERO             (len ? len - pos : 0)
690 static int __set_print_fmt(struct trace_uprobe *tu, char *buf, int len)
691 {
692         const char *fmt, *arg;
693         int i;
694         int pos = 0;
695
696         if (is_ret_probe(tu)) {
697                 fmt = "(%lx <- %lx)";
698                 arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
699         } else {
700                 fmt = "(%lx)";
701                 arg = "REC->" FIELD_STRING_IP;
702         }
703
704         /* When len=0, we just calculate the needed length */
705
706         pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt);
707
708         for (i = 0; i < tu->nr_args; i++) {
709                 pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s",
710                                 tu->args[i].name, tu->args[i].type->fmt);
711         }
712
713         pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
714
715         for (i = 0; i < tu->nr_args; i++) {
716                 pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
717                                 tu->args[i].name);
718         }
719
720         return pos;     /* return the length of print_fmt */
721 }
722 #undef LEN_OR_ZERO
723
724 static int set_print_fmt(struct trace_uprobe *tu)
725 {
726         char *print_fmt;
727         int len;
728
729         /* First: called with 0 length to calculate the needed length */
730         len = __set_print_fmt(tu, NULL, 0);
731         print_fmt = kmalloc(len + 1, GFP_KERNEL);
732         if (!print_fmt)
733                 return -ENOMEM;
734
735         /* Second: actually write the @print_fmt */
736         __set_print_fmt(tu, print_fmt, len + 1);
737         tu->call.print_fmt = print_fmt;
738
739         return 0;
740 }
741
742 #ifdef CONFIG_PERF_EVENTS
743 static bool
744 __uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm)
745 {
746         struct perf_event *event;
747
748         if (filter->nr_systemwide)
749                 return true;
750
751         list_for_each_entry(event, &filter->perf_events, hw.tp_list) {
752                 if (event->hw.tp_target->mm == mm)
753                         return true;
754         }
755
756         return false;
757 }
758
759 static inline bool
760 uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event)
761 {
762         return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm);
763 }
764
765 static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event)
766 {
767         bool done;
768
769         write_lock(&tu->filter.rwlock);
770         if (event->hw.tp_target) {
771                 /*
772                  * event->parent != NULL means copy_process(), we can avoid
773                  * uprobe_apply(). current->mm must be probed and we can rely
774                  * on dup_mmap() which preserves the already installed bp's.
775                  *
776                  * attr.enable_on_exec means that exec/mmap will install the
777                  * breakpoints we need.
778                  */
779                 done = tu->filter.nr_systemwide ||
780                         event->parent || event->attr.enable_on_exec ||
781                         uprobe_filter_event(tu, event);
782                 list_add(&event->hw.tp_list, &tu->filter.perf_events);
783         } else {
784                 done = tu->filter.nr_systemwide;
785                 tu->filter.nr_systemwide++;
786         }
787         write_unlock(&tu->filter.rwlock);
788
789         if (!done)
790                 uprobe_apply(tu->inode, tu->offset, &tu->consumer, true);
791
792         return 0;
793 }
794
795 static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
796 {
797         bool done;
798
799         write_lock(&tu->filter.rwlock);
800         if (event->hw.tp_target) {
801                 list_del(&event->hw.tp_list);
802                 done = tu->filter.nr_systemwide ||
803                         (event->hw.tp_target->flags & PF_EXITING) ||
804                         uprobe_filter_event(tu, event);
805         } else {
806                 tu->filter.nr_systemwide--;
807                 done = tu->filter.nr_systemwide;
808         }
809         write_unlock(&tu->filter.rwlock);
810
811         if (!done)
812                 uprobe_apply(tu->inode, tu->offset, &tu->consumer, false);
813
814         return 0;
815 }
816
817 static bool uprobe_perf_filter(struct uprobe_consumer *uc,
818                                 enum uprobe_filter_ctx ctx, struct mm_struct *mm)
819 {
820         struct trace_uprobe *tu;
821         int ret;
822
823         tu = container_of(uc, struct trace_uprobe, consumer);
824         read_lock(&tu->filter.rwlock);
825         ret = __uprobe_perf_filter(&tu->filter, mm);
826         read_unlock(&tu->filter.rwlock);
827
828         return ret;
829 }
830
831 static void uprobe_perf_print(struct trace_uprobe *tu,
832                                 unsigned long func, struct pt_regs *regs)
833 {
834         struct ftrace_event_call *call = &tu->call;
835         struct uprobe_trace_entry_head *entry;
836         struct hlist_head *head;
837         void *data;
838         int size, rctx, i;
839
840         size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
841         size = ALIGN(size + tu->size + sizeof(u32), sizeof(u64)) - sizeof(u32);
842
843         preempt_disable();
844         head = this_cpu_ptr(call->perf_events);
845         if (hlist_empty(head))
846                 goto out;
847
848         entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
849         if (!entry)
850                 goto out;
851
852         if (is_ret_probe(tu)) {
853                 entry->vaddr[0] = func;
854                 entry->vaddr[1] = instruction_pointer(regs);
855                 data = DATAOF_TRACE_ENTRY(entry, true);
856         } else {
857                 entry->vaddr[0] = instruction_pointer(regs);
858                 data = DATAOF_TRACE_ENTRY(entry, false);
859         }
860
861         for (i = 0; i < tu->nr_args; i++)
862                 call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
863
864         perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL);
865  out:
866         preempt_enable();
867 }
868
869 /* uprobe profile handler */
870 static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
871 {
872         if (!uprobe_perf_filter(&tu->consumer, 0, current->mm))
873                 return UPROBE_HANDLER_REMOVE;
874
875         if (!is_ret_probe(tu))
876                 uprobe_perf_print(tu, 0, regs);
877         return 0;
878 }
879
880 static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func,
881                                 struct pt_regs *regs)
882 {
883         uprobe_perf_print(tu, func, regs);
884 }
885 #endif  /* CONFIG_PERF_EVENTS */
886
887 static
888 int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, void *data)
889 {
890         struct trace_uprobe *tu = event->data;
891
892         switch (type) {
893         case TRACE_REG_REGISTER:
894                 return probe_event_enable(tu, TP_FLAG_TRACE, NULL);
895
896         case TRACE_REG_UNREGISTER:
897                 probe_event_disable(tu, TP_FLAG_TRACE);
898                 return 0;
899
900 #ifdef CONFIG_PERF_EVENTS
901         case TRACE_REG_PERF_REGISTER:
902                 return probe_event_enable(tu, TP_FLAG_PROFILE, uprobe_perf_filter);
903
904         case TRACE_REG_PERF_UNREGISTER:
905                 probe_event_disable(tu, TP_FLAG_PROFILE);
906                 return 0;
907
908         case TRACE_REG_PERF_OPEN:
909                 return uprobe_perf_open(tu, data);
910
911         case TRACE_REG_PERF_CLOSE:
912                 return uprobe_perf_close(tu, data);
913
914 #endif
915         default:
916                 return 0;
917         }
918         return 0;
919 }
920
921 static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
922 {
923         struct trace_uprobe *tu;
924         int ret = 0;
925
926         tu = container_of(con, struct trace_uprobe, consumer);
927         tu->nhit++;
928
929         if (tu->flags & TP_FLAG_TRACE)
930                 ret |= uprobe_trace_func(tu, regs);
931
932 #ifdef CONFIG_PERF_EVENTS
933         if (tu->flags & TP_FLAG_PROFILE)
934                 ret |= uprobe_perf_func(tu, regs);
935 #endif
936         return ret;
937 }
938
939 static int uretprobe_dispatcher(struct uprobe_consumer *con,
940                                 unsigned long func, struct pt_regs *regs)
941 {
942         struct trace_uprobe *tu;
943
944         tu = container_of(con, struct trace_uprobe, consumer);
945
946         if (tu->flags & TP_FLAG_TRACE)
947                 uretprobe_trace_func(tu, func, regs);
948
949 #ifdef CONFIG_PERF_EVENTS
950         if (tu->flags & TP_FLAG_PROFILE)
951                 uretprobe_perf_func(tu, func, regs);
952 #endif
953         return 0;
954 }
955
956 static struct trace_event_functions uprobe_funcs = {
957         .trace          = print_uprobe_event
958 };
959
960 static int register_uprobe_event(struct trace_uprobe *tu)
961 {
962         struct ftrace_event_call *call = &tu->call;
963         int ret;
964
965         /* Initialize ftrace_event_call */
966         INIT_LIST_HEAD(&call->class->fields);
967         call->event.funcs = &uprobe_funcs;
968         call->class->define_fields = uprobe_event_define_fields;
969
970         if (set_print_fmt(tu) < 0)
971                 return -ENOMEM;
972
973         ret = register_ftrace_event(&call->event);
974         if (!ret) {
975                 kfree(call->print_fmt);
976                 return -ENODEV;
977         }
978         call->flags = 0;
979         call->class->reg = trace_uprobe_register;
980         call->data = tu;
981         ret = trace_add_event_call(call);
982
983         if (ret) {
984                 pr_info("Failed to register uprobe event: %s\n", call->name);
985                 kfree(call->print_fmt);
986                 unregister_ftrace_event(&call->event);
987         }
988
989         return ret;
990 }
991
992 static int unregister_uprobe_event(struct trace_uprobe *tu)
993 {
994         int ret;
995
996         /* tu->event is unregistered in trace_remove_event_call() */
997         ret = trace_remove_event_call(&tu->call);
998         if (ret)
999                 return ret;
1000         kfree(tu->call.print_fmt);
1001         tu->call.print_fmt = NULL;
1002         return 0;
1003 }
1004
1005 /* Make a trace interface for controling probe points */
1006 static __init int init_uprobe_trace(void)
1007 {
1008         struct dentry *d_tracer;
1009
1010         d_tracer = tracing_init_dentry();
1011         if (!d_tracer)
1012                 return 0;
1013
1014         trace_create_file("uprobe_events", 0644, d_tracer,
1015                                     NULL, &uprobe_events_ops);
1016         /* Profile interface */
1017         trace_create_file("uprobe_profile", 0444, d_tracer,
1018                                     NULL, &uprobe_profile_ops);
1019         return 0;
1020 }
1021
1022 fs_initcall(init_uprobe_trace);