From 0bc11ed5ab60c135aa764a62c02cd5ea68289de4 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 25 Jul 2019 15:24:37 +0900 Subject: [PATCH 01/33] kprobes: Allow kprobes coexist with livepatch Allow kprobes which do not modify regs->ip, coexist with livepatch by dropping FTRACE_OPS_FL_IPMODIFY from ftrace_ops. User who wants to modify regs->ip (e.g. function fault injection) must set a dummy post_handler to its kprobes when registering. However, if such regs->ip modifying kprobes is set on a function, that function can not be livepatched. Link: http://lkml.kernel.org/r/156403587671.30117.5233558741694155985.stgit@devnote2 Acked-by: Joe Lawrence Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/kprobes.c | 56 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 40 insertions(+), 16 deletions(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index d9770a5393c8..f57deec96ba1 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -961,9 +961,16 @@ static struct kprobe *alloc_aggr_kprobe(struct kprobe *p) #ifdef CONFIG_KPROBES_ON_FTRACE static struct ftrace_ops kprobe_ftrace_ops __read_mostly = { + .func = kprobe_ftrace_handler, + .flags = FTRACE_OPS_FL_SAVE_REGS, +}; + +static struct ftrace_ops kprobe_ipmodify_ops __read_mostly = { .func = kprobe_ftrace_handler, .flags = FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_IPMODIFY, }; + +static int kprobe_ipmodify_enabled; static int kprobe_ftrace_enabled; /* Must ensure p->addr is really on ftrace */ @@ -976,58 +983,75 @@ static int prepare_kprobe(struct kprobe *p) } /* Caller must lock kprobe_mutex */ -static int arm_kprobe_ftrace(struct kprobe *p) +static int __arm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops, + int *cnt) { int ret = 0; - ret = ftrace_set_filter_ip(&kprobe_ftrace_ops, - (unsigned long)p->addr, 0, 0); + ret = ftrace_set_filter_ip(ops, (unsigned long)p->addr, 0, 0); if (ret) { pr_debug("Failed to arm kprobe-ftrace at %pS (%d)\n", p->addr, ret); return ret; } - if (kprobe_ftrace_enabled == 0) { - ret = register_ftrace_function(&kprobe_ftrace_ops); + if (*cnt == 0) { + ret = register_ftrace_function(ops); if (ret) { pr_debug("Failed to init kprobe-ftrace (%d)\n", ret); goto err_ftrace; } } - kprobe_ftrace_enabled++; + (*cnt)++; return ret; err_ftrace: /* - * Note: Since kprobe_ftrace_ops has IPMODIFY set, and ftrace requires a - * non-empty filter_hash for IPMODIFY ops, we're safe from an accidental - * empty filter_hash which would undesirably trace all functions. + * At this point, sinec ops is not registered, we should be sefe from + * registering empty filter. */ - ftrace_set_filter_ip(&kprobe_ftrace_ops, (unsigned long)p->addr, 1, 0); + ftrace_set_filter_ip(ops, (unsigned long)p->addr, 1, 0); return ret; } +static int arm_kprobe_ftrace(struct kprobe *p) +{ + bool ipmodify = (p->post_handler != NULL); + + return __arm_kprobe_ftrace(p, + ipmodify ? &kprobe_ipmodify_ops : &kprobe_ftrace_ops, + ipmodify ? &kprobe_ipmodify_enabled : &kprobe_ftrace_enabled); +} + /* Caller must lock kprobe_mutex */ -static int disarm_kprobe_ftrace(struct kprobe *p) +static int __disarm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops, + int *cnt) { int ret = 0; - if (kprobe_ftrace_enabled == 1) { - ret = unregister_ftrace_function(&kprobe_ftrace_ops); + if (*cnt == 1) { + ret = unregister_ftrace_function(ops); if (WARN(ret < 0, "Failed to unregister kprobe-ftrace (%d)\n", ret)) return ret; } - kprobe_ftrace_enabled--; + (*cnt)--; - ret = ftrace_set_filter_ip(&kprobe_ftrace_ops, - (unsigned long)p->addr, 1, 0); + ret = ftrace_set_filter_ip(ops, (unsigned long)p->addr, 1, 0); WARN_ONCE(ret < 0, "Failed to disarm kprobe-ftrace at %pS (%d)\n", p->addr, ret); return ret; } + +static int disarm_kprobe_ftrace(struct kprobe *p) +{ + bool ipmodify = (p->post_handler != NULL); + + return __disarm_kprobe_ftrace(p, + ipmodify ? &kprobe_ipmodify_ops : &kprobe_ftrace_ops, + ipmodify ? &kprobe_ipmodify_enabled : &kprobe_ftrace_enabled); +} #else /* !CONFIG_KPROBES_ON_FTRACE */ #define prepare_kprobe(p) arch_prepare_kprobe(p) #define arm_kprobe_ftrace(p) (-ENODEV) From 1bd95be204aa3c9456e950d5ab36588dbdbb05f1 Mon Sep 17 00:00:00 2001 From: Matt Helsley Date: Wed, 24 Jul 2019 14:04:55 -0700 Subject: [PATCH 02/33] recordmcount: Remove redundant strcmp The strcmp is unnecessary since .text is already accepted as a prefix in the strncmp(). Link: http://lkml.kernel.org/r/358e590b49adbe4185e161a8b364e323f3d52857.1563992889.git.mhelsley@vmware.com Signed-off-by: Matt Helsley Signed-off-by: Steven Rostedt (VMware) --- scripts/recordmcount.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index 8387a9bc064a..ebe98c39f3cd 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -405,8 +405,7 @@ is_mcounted_section_name(char const *const txtname) strcmp(".irqentry.text", txtname) == 0 || strcmp(".softirqentry.text", txtname) == 0 || strcmp(".kprobes.text", txtname) == 0 || - strcmp(".cpuidle.text", txtname) == 0 || - strcmp(".text.unlikely", txtname) == 0; + strcmp(".cpuidle.text", txtname) == 0; } /* 32 bit and 64 bit are very similar */ From a146207916092a49bd01ef861ac9582fd318c21a Mon Sep 17 00:00:00 2001 From: Matt Helsley Date: Wed, 24 Jul 2019 14:04:56 -0700 Subject: [PATCH 03/33] recordmcount: Remove uread() uread() is only used to initialize the ELF file's pseudo private-memory mapping while uwrite() and ulseek() work within the pseudo-mapping and extend it as necessary. Thus it is not a complementary function to uwrite() and ulseek(). It also makes no sense to do cleanups inside uread() when its only caller, mmap_file(), is doing the relevant allocations and associated initializations. Therefore it's clearer to use a plain read() call to initialize the data in mmap_file() and remove uread(). Link: http://lkml.kernel.org/r/31a87c22b19150cec1c8dc800c8b0873a2741703.1563992889.git.mhelsley@vmware.com Signed-off-by: Matt Helsley Signed-off-by: Steven Rostedt (VMware) --- scripts/recordmcount.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index ebe98c39f3cd..c0dd46344063 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -89,7 +89,7 @@ succeed_file(void) longjmp(jmpenv, SJ_SUCCEED); } -/* ulseek, uread, ...: Check return value for errors. */ +/* ulseek, uwrite, ...: Check return value for errors. */ static off_t ulseek(int const fd, off_t const offset, int const whence) @@ -112,17 +112,6 @@ ulseek(int const fd, off_t const offset, int const whence) return file_ptr - file_map; } -static size_t -uread(int const fd, void *const buf, size_t const count) -{ - size_t const n = read(fd, buf, count); - if (n != count) { - perror("read"); - fail_file(); - } - return n; -} - static size_t uwrite(int const fd, void const *const buf, size_t const count) { @@ -298,7 +287,10 @@ static void *mmap_file(char const *fname) if (file_map == MAP_FAILED) { mmap_failed = 1; file_map = umalloc(sb.st_size); - uread(fd_map, file_map, sb.st_size); + if (read(fd_map, file_map, sb.st_size) != sb.st_size) { + perror(fname); + fail_file(); + } } close(fd_map); From 17e262e9954bc3b6c857670f60b5b015f53509a7 Mon Sep 17 00:00:00 2001 From: Matt Helsley Date: Wed, 24 Jul 2019 14:04:57 -0700 Subject: [PATCH 04/33] recordmcount: Remove unused fd from uwrite() and ulseek() uwrite() works within the pseudo-mapping and extends it as necessary without needing the file descriptor (fd) parameter passed to it. Similarly, ulseek() doesn't need its fd parameter. These parameters were only added because the functions bear a conceptual resemblance to write() and lseek(). Worse, they obscure the fact that at the time uwrite() and ulseek() are called fd_map is not a valid file descriptor. Remove the unused file descriptor parameters that make it look like fd_map is still valid. Link: http://lkml.kernel.org/r/2a136e820ee208469d375265c7b8eb28570749a0.1563992889.git.mhelsley@vmware.com Signed-off-by: Matt Helsley Signed-off-by: Steven Rostedt (VMware) --- scripts/recordmcount.c | 16 ++++++++-------- scripts/recordmcount.h | 26 +++++++++++++------------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index c0dd46344063..1fe5fba99959 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -92,7 +92,7 @@ succeed_file(void) /* ulseek, uwrite, ...: Check return value for errors. */ static off_t -ulseek(int const fd, off_t const offset, int const whence) +ulseek(off_t const offset, int const whence) { switch (whence) { case SEEK_SET: @@ -113,7 +113,7 @@ ulseek(int const fd, off_t const offset, int const whence) } static size_t -uwrite(int const fd, void const *const buf, size_t const count) +uwrite(void const *const buf, size_t const count) { size_t cnt = count; off_t idx = 0; @@ -183,8 +183,8 @@ static int make_nop_x86(void *map, size_t const offset) return -1; /* convert to nop */ - ulseek(fd_map, offset - 1, SEEK_SET); - uwrite(fd_map, ideal_nop, 5); + ulseek(offset - 1, SEEK_SET); + uwrite(ideal_nop, 5); return 0; } @@ -232,10 +232,10 @@ static int make_nop_arm(void *map, size_t const offset) return -1; /* Convert to nop */ - ulseek(fd_map, off, SEEK_SET); + ulseek(off, SEEK_SET); do { - uwrite(fd_map, ideal_nop, nop_size); + uwrite(ideal_nop, nop_size); } while (--cnt > 0); return 0; @@ -252,8 +252,8 @@ static int make_nop_arm64(void *map, size_t const offset) return -1; /* Convert to nop */ - ulseek(fd_map, offset, SEEK_SET); - uwrite(fd_map, ideal_nop, 4); + ulseek(offset, SEEK_SET); + uwrite(ideal_nop, 4); return 0; } diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h index 47fca2c69a73..c1e1b04b4871 100644 --- a/scripts/recordmcount.h +++ b/scripts/recordmcount.h @@ -202,14 +202,14 @@ static void append_func(Elf_Ehdr *const ehdr, new_e_shoff = t; /* body for new shstrtab */ - ulseek(fd_map, sb.st_size, SEEK_SET); - uwrite(fd_map, old_shstr_sh_offset + (void *)ehdr, old_shstr_sh_size); - uwrite(fd_map, mc_name, 1 + strlen(mc_name)); + ulseek(sb.st_size, SEEK_SET); + uwrite(old_shstr_sh_offset + (void *)ehdr, old_shstr_sh_size); + uwrite(mc_name, 1 + strlen(mc_name)); /* old(modified) Elf_Shdr table, word-byte aligned */ - ulseek(fd_map, t, SEEK_SET); + ulseek(t, SEEK_SET); t += sizeof(Elf_Shdr) * old_shnum; - uwrite(fd_map, old_shoff + (void *)ehdr, + uwrite(old_shoff + (void *)ehdr, sizeof(Elf_Shdr) * old_shnum); /* new sections __mcount_loc and .rel__mcount_loc */ @@ -225,7 +225,7 @@ static void append_func(Elf_Ehdr *const ehdr, mcsec.sh_info = 0; mcsec.sh_addralign = _w(_size); mcsec.sh_entsize = _w(_size); - uwrite(fd_map, &mcsec, sizeof(mcsec)); + uwrite(&mcsec, sizeof(mcsec)); mcsec.sh_name = w(old_shstr_sh_size); mcsec.sh_type = (sizeof(Elf_Rela) == rel_entsize) @@ -239,15 +239,15 @@ static void append_func(Elf_Ehdr *const ehdr, mcsec.sh_info = w(old_shnum); mcsec.sh_addralign = _w(_size); mcsec.sh_entsize = _w(rel_entsize); - uwrite(fd_map, &mcsec, sizeof(mcsec)); + uwrite(&mcsec, sizeof(mcsec)); - uwrite(fd_map, mloc0, (void *)mlocp - (void *)mloc0); - uwrite(fd_map, mrel0, (void *)mrelp - (void *)mrel0); + uwrite(mloc0, (void *)mlocp - (void *)mloc0); + uwrite(mrel0, (void *)mrelp - (void *)mrel0); ehdr->e_shoff = _w(new_e_shoff); ehdr->e_shnum = w2(2 + w2(ehdr->e_shnum)); /* {.rel,}__mcount_loc */ - ulseek(fd_map, 0, SEEK_SET); - uwrite(fd_map, ehdr, sizeof(*ehdr)); + ulseek(0, SEEK_SET); + uwrite(ehdr, sizeof(*ehdr)); } static unsigned get_mcountsym(Elf_Sym const *const sym0, @@ -396,8 +396,8 @@ static void nop_mcount(Elf_Shdr const *const relhdr, Elf_Rel rel; rel = *(Elf_Rel *)relp; Elf_r_info(&rel, Elf_r_sym(relp), rel_type_nop); - ulseek(fd_map, (void *)relp - (void *)ehdr, SEEK_SET); - uwrite(fd_map, &rel, sizeof(rel)); + ulseek((void *)relp - (void *)ehdr, SEEK_SET); + uwrite(&rel, sizeof(rel)); } relp = (Elf_Rel const *)(rel_entsize + (void *)relp); } From 60d53e2c3b75e79c83970fe73db79123d9462c7c Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 20 Jun 2019 00:07:20 +0900 Subject: [PATCH 05/33] tracing/probe: Split trace_event related data from trace_probe Split the trace_event related data from trace_probe data structure and introduce trace_probe_event data structure for its folder. This trace_probe_event data structure can have multiple trace_probe. Link: http://lkml.kernel.org/r/156095683995.28024.7552150340561557873.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 157 +++++++++++++++++++++++++--------- kernel/trace/trace_probe.c | 54 ++++++++---- kernel/trace/trace_probe.h | 48 ++++++++--- kernel/trace/trace_uprobe.c | 165 +++++++++++++++++++++++++++--------- 4 files changed, 311 insertions(+), 113 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 9d483ad9bb6c..eac6344a2e7c 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -180,20 +180,33 @@ unsigned long trace_kprobe_address(struct trace_kprobe *tk) return addr; } +static nokprobe_inline struct trace_kprobe * +trace_kprobe_primary_from_call(struct trace_event_call *call) +{ + struct trace_probe *tp; + + tp = trace_probe_primary_from_call(call); + if (WARN_ON_ONCE(!tp)) + return NULL; + + return container_of(tp, struct trace_kprobe, tp); +} + bool trace_kprobe_on_func_entry(struct trace_event_call *call) { - struct trace_kprobe *tk = (struct trace_kprobe *)call->data; + struct trace_kprobe *tk = trace_kprobe_primary_from_call(call); - return kprobe_on_func_entry(tk->rp.kp.addr, + return tk ? kprobe_on_func_entry(tk->rp.kp.addr, tk->rp.kp.addr ? NULL : tk->rp.kp.symbol_name, - tk->rp.kp.addr ? 0 : tk->rp.kp.offset); + tk->rp.kp.addr ? 0 : tk->rp.kp.offset) : false; } bool trace_kprobe_error_injectable(struct trace_event_call *call) { - struct trace_kprobe *tk = (struct trace_kprobe *)call->data; + struct trace_kprobe *tk = trace_kprobe_primary_from_call(call); - return within_error_injection_list(trace_kprobe_address(tk)); + return tk ? within_error_injection_list(trace_kprobe_address(tk)) : + false; } static int register_kprobe_event(struct trace_kprobe *tk); @@ -291,32 +304,75 @@ static inline int __enable_trace_kprobe(struct trace_kprobe *tk) return ret; } +static void __disable_trace_kprobe(struct trace_probe *tp) +{ + struct trace_probe *pos; + struct trace_kprobe *tk; + + list_for_each_entry(pos, trace_probe_probe_list(tp), list) { + tk = container_of(pos, struct trace_kprobe, tp); + if (!trace_kprobe_is_registered(tk)) + continue; + if (trace_kprobe_is_return(tk)) + disable_kretprobe(&tk->rp); + else + disable_kprobe(&tk->rp.kp); + } +} + /* * Enable trace_probe * if the file is NULL, enable "perf" handler, or enable "trace" handler. */ -static int -enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) +static int enable_trace_kprobe(struct trace_event_call *call, + struct trace_event_file *file) { - bool enabled = trace_probe_is_enabled(&tk->tp); + struct trace_probe *pos, *tp; + struct trace_kprobe *tk; + bool enabled; int ret = 0; + tp = trace_probe_primary_from_call(call); + if (WARN_ON_ONCE(!tp)) + return -ENODEV; + enabled = trace_probe_is_enabled(tp); + + /* This also changes "enabled" state */ if (file) { - ret = trace_probe_add_file(&tk->tp, file); + ret = trace_probe_add_file(tp, file); if (ret) return ret; } else - trace_probe_set_flag(&tk->tp, TP_FLAG_PROFILE); + trace_probe_set_flag(tp, TP_FLAG_PROFILE); if (enabled) return 0; - ret = __enable_trace_kprobe(tk); - if (ret) { + enabled = false; + list_for_each_entry(pos, trace_probe_probe_list(tp), list) { + tk = container_of(pos, struct trace_kprobe, tp); + if (trace_kprobe_has_gone(tk)) + continue; + ret = __enable_trace_kprobe(tk); + if (ret) { + if (enabled) { + __disable_trace_kprobe(tp); + enabled = false; + } + break; + } + enabled = true; + } + + if (!enabled) { + /* No probe is enabled. Roll back */ if (file) - trace_probe_remove_file(&tk->tp, file); + trace_probe_remove_file(tp, file); else - trace_probe_clear_flag(&tk->tp, TP_FLAG_PROFILE); + trace_probe_clear_flag(tp, TP_FLAG_PROFILE); + if (!ret) + /* Since all probes are gone, this is not available */ + ret = -EADDRNOTAVAIL; } return ret; @@ -326,11 +382,14 @@ enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) * Disable trace_probe * if the file is NULL, disable "perf" handler, or disable "trace" handler. */ -static int -disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) +static int disable_trace_kprobe(struct trace_event_call *call, + struct trace_event_file *file) { - struct trace_probe *tp = &tk->tp; - int ret = 0; + struct trace_probe *tp; + + tp = trace_probe_primary_from_call(call); + if (WARN_ON_ONCE(!tp)) + return -ENODEV; if (file) { if (!trace_probe_get_file_link(tp, file)) @@ -341,12 +400,8 @@ disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) } else trace_probe_clear_flag(tp, TP_FLAG_PROFILE); - if (!trace_probe_is_enabled(tp) && trace_kprobe_is_registered(tk)) { - if (trace_kprobe_is_return(tk)) - disable_kretprobe(&tk->rp); - else - disable_kprobe(&tk->rp.kp); - } + if (!trace_probe_is_enabled(tp)) + __disable_trace_kprobe(tp); out: if (file) @@ -358,7 +413,7 @@ disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) */ trace_probe_remove_file(tp, file); - return ret; + return 0; } #if defined(CONFIG_KPROBES_ON_FTRACE) && \ @@ -1089,7 +1144,10 @@ print_kprobe_event(struct trace_iterator *iter, int flags, struct trace_probe *tp; field = (struct kprobe_trace_entry_head *)iter->ent; - tp = container_of(event, struct trace_probe, call.event); + tp = trace_probe_primary_from_call( + container_of(event, struct trace_event_call, event)); + if (WARN_ON_ONCE(!tp)) + goto out; trace_seq_printf(s, "%s: (", trace_probe_name(tp)); @@ -1116,7 +1174,10 @@ print_kretprobe_event(struct trace_iterator *iter, int flags, struct trace_probe *tp; field = (struct kretprobe_trace_entry_head *)iter->ent; - tp = container_of(event, struct trace_probe, call.event); + tp = trace_probe_primary_from_call( + container_of(event, struct trace_event_call, event)); + if (WARN_ON_ONCE(!tp)) + goto out; trace_seq_printf(s, "%s: (", trace_probe_name(tp)); @@ -1145,23 +1206,31 @@ static int kprobe_event_define_fields(struct trace_event_call *event_call) { int ret; struct kprobe_trace_entry_head field; - struct trace_kprobe *tk = (struct trace_kprobe *)event_call->data; + struct trace_probe *tp; + + tp = trace_probe_primary_from_call(event_call); + if (WARN_ON_ONCE(!tp)) + return -ENOENT; DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); - return traceprobe_define_arg_fields(event_call, sizeof(field), &tk->tp); + return traceprobe_define_arg_fields(event_call, sizeof(field), tp); } static int kretprobe_event_define_fields(struct trace_event_call *event_call) { int ret; struct kretprobe_trace_entry_head field; - struct trace_kprobe *tk = (struct trace_kprobe *)event_call->data; + struct trace_probe *tp; + + tp = trace_probe_primary_from_call(event_call); + if (WARN_ON_ONCE(!tp)) + return -ENOENT; DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0); DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0); - return traceprobe_define_arg_fields(event_call, sizeof(field), &tk->tp); + return traceprobe_define_arg_fields(event_call, sizeof(field), tp); } #ifdef CONFIG_PERF_EVENTS @@ -1289,20 +1358,19 @@ int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type, static int kprobe_register(struct trace_event_call *event, enum trace_reg type, void *data) { - struct trace_kprobe *tk = (struct trace_kprobe *)event->data; struct trace_event_file *file = data; switch (type) { case TRACE_REG_REGISTER: - return enable_trace_kprobe(tk, file); + return enable_trace_kprobe(event, file); case TRACE_REG_UNREGISTER: - return disable_trace_kprobe(tk, file); + return disable_trace_kprobe(event, file); #ifdef CONFIG_PERF_EVENTS case TRACE_REG_PERF_REGISTER: - return enable_trace_kprobe(tk, NULL); + return enable_trace_kprobe(event, NULL); case TRACE_REG_PERF_UNREGISTER: - return disable_trace_kprobe(tk, NULL); + return disable_trace_kprobe(event, NULL); case TRACE_REG_PERF_OPEN: case TRACE_REG_PERF_CLOSE: case TRACE_REG_PERF_ADD: @@ -1369,7 +1437,6 @@ static inline void init_trace_event_call(struct trace_kprobe *tk) call->flags = TRACE_EVENT_FL_KPROBE; call->class->reg = kprobe_register; - call->data = tk; } static int register_kprobe_event(struct trace_kprobe *tk) @@ -1432,7 +1499,9 @@ void destroy_local_trace_kprobe(struct trace_event_call *event_call) { struct trace_kprobe *tk; - tk = container_of(event_call, struct trace_kprobe, tp.call); + tk = trace_kprobe_primary_from_call(event_call); + if (unlikely(!tk)) + return; if (trace_probe_is_enabled(&tk->tp)) { WARN_ON(1); @@ -1577,7 +1646,8 @@ static __init int kprobe_trace_self_tests_init(void) pr_warn("error on getting probe file.\n"); warn++; } else - enable_trace_kprobe(tk, file); + enable_trace_kprobe( + trace_probe_event_call(&tk->tp), file); } } @@ -1598,7 +1668,8 @@ static __init int kprobe_trace_self_tests_init(void) pr_warn("error on getting probe file.\n"); warn++; } else - enable_trace_kprobe(tk, file); + enable_trace_kprobe( + trace_probe_event_call(&tk->tp), file); } } @@ -1631,7 +1702,8 @@ static __init int kprobe_trace_self_tests_init(void) pr_warn("error on getting probe file.\n"); warn++; } else - disable_trace_kprobe(tk, file); + disable_trace_kprobe( + trace_probe_event_call(&tk->tp), file); } tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM); @@ -1649,7 +1721,8 @@ static __init int kprobe_trace_self_tests_init(void) pr_warn("error on getting probe file.\n"); warn++; } else - disable_trace_kprobe(tk, file); + disable_trace_kprobe( + trace_probe_event_call(&tk->tp), file); } ret = trace_run_command("-:testprobe", create_or_delete_trace_kprobe); diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index fb6bfbc5bf86..28733bd6b607 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -889,41 +889,59 @@ int traceprobe_define_arg_fields(struct trace_event_call *event_call, void trace_probe_cleanup(struct trace_probe *tp) { - struct trace_event_call *call = trace_probe_event_call(tp); int i; for (i = 0; i < tp->nr_args; i++) traceprobe_free_probe_arg(&tp->args[i]); - if (call->class) - kfree(call->class->system); - kfree(call->name); - kfree(call->print_fmt); + if (tp->event) { + struct trace_event_call *call = trace_probe_event_call(tp); + + kfree(tp->event->class.system); + kfree(call->name); + kfree(call->print_fmt); + kfree(tp->event); + tp->event = NULL; + } } int trace_probe_init(struct trace_probe *tp, const char *event, const char *group) { - struct trace_event_call *call = trace_probe_event_call(tp); + struct trace_event_call *call; + int ret = 0; if (!event || !group) return -EINVAL; - call->class = &tp->class; - call->name = kstrdup(event, GFP_KERNEL); - if (!call->name) + tp->event = kzalloc(sizeof(struct trace_probe_event), GFP_KERNEL); + if (!tp->event) return -ENOMEM; - tp->class.system = kstrdup(group, GFP_KERNEL); - if (!tp->class.system) { - kfree(call->name); - call->name = NULL; - return -ENOMEM; + call = trace_probe_event_call(tp); + call->class = &tp->event->class; + call->name = kstrdup(event, GFP_KERNEL); + if (!call->name) { + ret = -ENOMEM; + goto error; } - INIT_LIST_HEAD(&tp->files); - INIT_LIST_HEAD(&tp->class.fields); + + tp->event->class.system = kstrdup(group, GFP_KERNEL); + if (!tp->event->class.system) { + ret = -ENOMEM; + goto error; + } + INIT_LIST_HEAD(&tp->event->files); + INIT_LIST_HEAD(&tp->event->class.fields); + INIT_LIST_HEAD(&tp->event->probes); + INIT_LIST_HEAD(&tp->list); + list_add(&tp->event->probes, &tp->list); return 0; + +error: + trace_probe_cleanup(tp); + return ret; } int trace_probe_register_event_call(struct trace_probe *tp) @@ -952,7 +970,7 @@ int trace_probe_add_file(struct trace_probe *tp, struct trace_event_file *file) link->file = file; INIT_LIST_HEAD(&link->list); - list_add_tail_rcu(&link->list, &tp->files); + list_add_tail_rcu(&link->list, &tp->event->files); trace_probe_set_flag(tp, TP_FLAG_TRACE); return 0; } @@ -983,7 +1001,7 @@ int trace_probe_remove_file(struct trace_probe *tp, synchronize_rcu(); kfree(link); - if (list_empty(&tp->files)) + if (list_empty(&tp->event->files)) trace_probe_clear_flag(tp, TP_FLAG_TRACE); return 0; diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index d1714820efe1..0b84abb884c2 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -222,11 +222,18 @@ struct probe_arg { const struct fetch_type *type; /* Type of this argument */ }; -struct trace_probe { +/* Event call and class holder */ +struct trace_probe_event { unsigned int flags; /* For TP_FLAG_* */ struct trace_event_class class; struct trace_event_call call; struct list_head files; + struct list_head probes; +}; + +struct trace_probe { + struct list_head list; + struct trace_probe_event *event; ssize_t size; /* trace entry size */ unsigned int nr_args; struct probe_arg args[]; @@ -240,19 +247,19 @@ struct event_file_link { static inline bool trace_probe_test_flag(struct trace_probe *tp, unsigned int flag) { - return !!(tp->flags & flag); + return !!(tp->event->flags & flag); } static inline void trace_probe_set_flag(struct trace_probe *tp, unsigned int flag) { - tp->flags |= flag; + tp->event->flags |= flag; } static inline void trace_probe_clear_flag(struct trace_probe *tp, unsigned int flag) { - tp->flags &= ~flag; + tp->event->flags &= ~flag; } static inline bool trace_probe_is_enabled(struct trace_probe *tp) @@ -262,29 +269,48 @@ static inline bool trace_probe_is_enabled(struct trace_probe *tp) static inline const char *trace_probe_name(struct trace_probe *tp) { - return trace_event_name(&tp->call); + return trace_event_name(&tp->event->call); } static inline const char *trace_probe_group_name(struct trace_probe *tp) { - return tp->call.class->system; + return tp->event->call.class->system; } static inline struct trace_event_call * trace_probe_event_call(struct trace_probe *tp) { - return &tp->call; + return &tp->event->call; +} + +static inline struct trace_probe_event * +trace_probe_event_from_call(struct trace_event_call *event_call) +{ + return container_of(event_call, struct trace_probe_event, call); +} + +static inline struct trace_probe * +trace_probe_primary_from_call(struct trace_event_call *call) +{ + struct trace_probe_event *tpe = trace_probe_event_from_call(call); + + return list_first_entry(&tpe->probes, struct trace_probe, list); +} + +static inline struct list_head *trace_probe_probe_list(struct trace_probe *tp) +{ + return &tp->event->probes; } static inline int trace_probe_unregister_event_call(struct trace_probe *tp) { /* tp->event is unregistered in trace_remove_event_call() */ - return trace_remove_event_call(&tp->call); + return trace_remove_event_call(&tp->event->call); } static inline bool trace_probe_has_single_file(struct trace_probe *tp) { - return !!list_is_singular(&tp->files); + return !!list_is_singular(&tp->event->files); } int trace_probe_init(struct trace_probe *tp, const char *event, @@ -298,9 +324,9 @@ struct event_file_link *trace_probe_get_file_link(struct trace_probe *tp, struct trace_event_file *file); #define trace_probe_for_each_link(pos, tp) \ - list_for_each_entry(pos, &(tp)->files, list) + list_for_each_entry(pos, &(tp)->event->files, list) #define trace_probe_for_each_link_rcu(pos, tp) \ - list_for_each_entry_rcu(pos, &(tp)->files, list) + list_for_each_entry_rcu(pos, &(tp)->event->files, list) /* Check the name is good for event/group/fields */ static inline bool is_good_name(const char *name) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 1ceedb9146b1..ac799abb7da9 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -293,6 +293,18 @@ static bool trace_uprobe_match(const char *system, const char *event, (!system || strcmp(trace_probe_group_name(&tu->tp), system) == 0); } +static nokprobe_inline struct trace_uprobe * +trace_uprobe_primary_from_call(struct trace_event_call *call) +{ + struct trace_probe *tp; + + tp = trace_probe_primary_from_call(call); + if (WARN_ON_ONCE(!tp)) + return NULL; + + return container_of(tp, struct trace_uprobe, tp); +} + /* * Allocate new trace_uprobe and initialize it (including uprobes). */ @@ -897,7 +909,10 @@ print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *e u8 *data; entry = (struct uprobe_trace_entry_head *)iter->ent; - tu = container_of(event, struct trace_uprobe, tp.call.event); + tu = trace_uprobe_primary_from_call( + container_of(event, struct trace_event_call, event)); + if (unlikely(!tu)) + goto out; if (is_ret_probe(tu)) { trace_seq_printf(s, "%s: (0x%lx <- 0x%lx)", @@ -924,27 +939,71 @@ typedef bool (*filter_func_t)(struct uprobe_consumer *self, enum uprobe_filter_ctx ctx, struct mm_struct *mm); -static int -probe_event_enable(struct trace_uprobe *tu, struct trace_event_file *file, - filter_func_t filter) +static int trace_uprobe_enable(struct trace_uprobe *tu, filter_func_t filter) { - bool enabled = trace_probe_is_enabled(&tu->tp); int ret; + tu->consumer.filter = filter; + tu->inode = d_real_inode(tu->path.dentry); + + if (tu->ref_ctr_offset) + ret = uprobe_register_refctr(tu->inode, tu->offset, + tu->ref_ctr_offset, &tu->consumer); + else + ret = uprobe_register(tu->inode, tu->offset, &tu->consumer); + + if (ret) + tu->inode = NULL; + + return ret; +} + +static void __probe_event_disable(struct trace_probe *tp) +{ + struct trace_probe *pos; + struct trace_uprobe *tu; + + list_for_each_entry(pos, trace_probe_probe_list(tp), list) { + tu = container_of(pos, struct trace_uprobe, tp); + if (!tu->inode) + continue; + + WARN_ON(!uprobe_filter_is_empty(&tu->filter)); + + uprobe_unregister(tu->inode, tu->offset, &tu->consumer); + tu->inode = NULL; + } +} + +static int probe_event_enable(struct trace_event_call *call, + struct trace_event_file *file, filter_func_t filter) +{ + struct trace_probe *pos, *tp; + struct trace_uprobe *tu; + bool enabled; + int ret; + + tp = trace_probe_primary_from_call(call); + if (WARN_ON_ONCE(!tp)) + return -ENODEV; + enabled = trace_probe_is_enabled(tp); + + /* This may also change "enabled" state */ if (file) { - if (trace_probe_test_flag(&tu->tp, TP_FLAG_PROFILE)) + if (trace_probe_test_flag(tp, TP_FLAG_PROFILE)) return -EINTR; - ret = trace_probe_add_file(&tu->tp, file); + ret = trace_probe_add_file(tp, file); if (ret < 0) return ret; } else { - if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE)) + if (trace_probe_test_flag(tp, TP_FLAG_TRACE)) return -EINTR; - trace_probe_set_flag(&tu->tp, TP_FLAG_PROFILE); + trace_probe_set_flag(tp, TP_FLAG_PROFILE); } + tu = container_of(tp, struct trace_uprobe, tp); WARN_ON(!uprobe_filter_is_empty(&tu->filter)); if (enabled) @@ -954,18 +1013,15 @@ probe_event_enable(struct trace_uprobe *tu, struct trace_event_file *file, if (ret) goto err_flags; - tu->consumer.filter = filter; - tu->inode = d_real_inode(tu->path.dentry); - if (tu->ref_ctr_offset) { - ret = uprobe_register_refctr(tu->inode, tu->offset, - tu->ref_ctr_offset, &tu->consumer); - } else { - ret = uprobe_register(tu->inode, tu->offset, &tu->consumer); + list_for_each_entry(pos, trace_probe_probe_list(tp), list) { + tu = container_of(pos, struct trace_uprobe, tp); + ret = trace_uprobe_enable(tu, filter); + if (ret) { + __probe_event_disable(tp); + goto err_buffer; + } } - if (ret) - goto err_buffer; - return 0; err_buffer: @@ -973,33 +1029,35 @@ probe_event_enable(struct trace_uprobe *tu, struct trace_event_file *file, err_flags: if (file) - trace_probe_remove_file(&tu->tp, file); + trace_probe_remove_file(tp, file); else - trace_probe_clear_flag(&tu->tp, TP_FLAG_PROFILE); + trace_probe_clear_flag(tp, TP_FLAG_PROFILE); return ret; } -static void -probe_event_disable(struct trace_uprobe *tu, struct trace_event_file *file) +static void probe_event_disable(struct trace_event_call *call, + struct trace_event_file *file) { - if (!trace_probe_is_enabled(&tu->tp)) + struct trace_probe *tp; + + tp = trace_probe_primary_from_call(call); + if (WARN_ON_ONCE(!tp)) + return; + + if (!trace_probe_is_enabled(tp)) return; if (file) { - if (trace_probe_remove_file(&tu->tp, file) < 0) + if (trace_probe_remove_file(tp, file) < 0) return; - if (trace_probe_is_enabled(&tu->tp)) + if (trace_probe_is_enabled(tp)) return; } else - trace_probe_clear_flag(&tu->tp, TP_FLAG_PROFILE); - - WARN_ON(!uprobe_filter_is_empty(&tu->filter)); - - uprobe_unregister(tu->inode, tu->offset, &tu->consumer); - tu->inode = NULL; + trace_probe_clear_flag(tp, TP_FLAG_PROFILE); + __probe_event_disable(tp); uprobe_buffer_disable(); } @@ -1007,7 +1065,11 @@ static int uprobe_event_define_fields(struct trace_event_call *event_call) { int ret, size; struct uprobe_trace_entry_head field; - struct trace_uprobe *tu = event_call->data; + struct trace_uprobe *tu; + + tu = trace_uprobe_primary_from_call(event_call); + if (unlikely(!tu)) + return -ENODEV; if (is_ret_probe(tu)) { DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_FUNC, 0); @@ -1100,6 +1162,27 @@ static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event) return err; } +static int uprobe_perf_multi_call(struct trace_event_call *call, + struct perf_event *event, + int (*op)(struct trace_uprobe *tu, struct perf_event *event)) +{ + struct trace_probe *pos, *tp; + struct trace_uprobe *tu; + int ret = 0; + + tp = trace_probe_primary_from_call(call); + if (WARN_ON_ONCE(!tp)) + return -ENODEV; + + list_for_each_entry(pos, trace_probe_probe_list(tp), list) { + tu = container_of(pos, struct trace_uprobe, tp); + ret = op(tu, event); + if (ret) + break; + } + + return ret; +} static bool uprobe_perf_filter(struct uprobe_consumer *uc, enum uprobe_filter_ctx ctx, struct mm_struct *mm) { @@ -1213,30 +1296,29 @@ static int trace_uprobe_register(struct trace_event_call *event, enum trace_reg type, void *data) { - struct trace_uprobe *tu = event->data; struct trace_event_file *file = data; switch (type) { case TRACE_REG_REGISTER: - return probe_event_enable(tu, file, NULL); + return probe_event_enable(event, file, NULL); case TRACE_REG_UNREGISTER: - probe_event_disable(tu, file); + probe_event_disable(event, file); return 0; #ifdef CONFIG_PERF_EVENTS case TRACE_REG_PERF_REGISTER: - return probe_event_enable(tu, NULL, uprobe_perf_filter); + return probe_event_enable(event, NULL, uprobe_perf_filter); case TRACE_REG_PERF_UNREGISTER: - probe_event_disable(tu, NULL); + probe_event_disable(event, NULL); return 0; case TRACE_REG_PERF_OPEN: - return uprobe_perf_open(tu, data); + return uprobe_perf_multi_call(event, data, uprobe_perf_open); case TRACE_REG_PERF_CLOSE: - return uprobe_perf_close(tu, data); + return uprobe_perf_multi_call(event, data, uprobe_perf_close); #endif default: @@ -1330,7 +1412,6 @@ static inline void init_trace_event_call(struct trace_uprobe *tu) call->flags = TRACE_EVENT_FL_UPROBE | TRACE_EVENT_FL_CAP_ANY; call->class->reg = trace_uprobe_register; - call->data = tu; } static int register_uprobe_event(struct trace_uprobe *tu) @@ -1399,7 +1480,7 @@ void destroy_local_trace_uprobe(struct trace_event_call *event_call) { struct trace_uprobe *tu; - tu = container_of(event_call, struct trace_uprobe, tp.call); + tu = trace_uprobe_primary_from_call(event_call); free_trace_uprobe(tu); } From cb8e7a8d55e052fdcfd1a567305a9a180fb61c57 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 20 Jun 2019 00:07:29 +0900 Subject: [PATCH 06/33] tracing/dynevent: Delete all matched events When user gives an event name to delete, delete all matched events instead of the first one. This means if there are several events which have same name but different group (subsystem) name, those are removed if user passed only the event name, e.g. # cat kprobe_events p:group1/testevent _do_fork p:group2/testevent fork_idle # echo -:testevent >> kprobe_events # cat kprobe_events # Link: http://lkml.kernel.org/r/156095684958.28024.16597826267117453638.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_dynevent.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c index fa100ed3b4de..1cc55c50c491 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c @@ -61,10 +61,12 @@ int dyn_event_release(int argc, char **argv, struct dyn_event_operations *type) for_each_dyn_event_safe(pos, n) { if (type && type != pos->ops) continue; - if (pos->ops->match(system, event, pos)) { - ret = pos->ops->free(pos); + if (!pos->ops->match(system, event, pos)) + continue; + + ret = pos->ops->free(pos); + if (ret) break; - } } mutex_unlock(&event_mutex); From 30199137c899d7e416a2adc58bf09bec217ce9ca Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 20 Jun 2019 00:07:39 +0900 Subject: [PATCH 07/33] tracing/dynevent: Pass extra arguments to match operation Pass extra arguments to match operation for checking exact match. If the event doesn't support exact match, it will be ignored. Link: http://lkml.kernel.org/r/156095685930.28024.10405547027475590975.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_dynevent.c | 4 +++- kernel/trace/trace_dynevent.h | 7 ++++--- kernel/trace/trace_events_hist.c | 4 ++-- kernel/trace/trace_kprobe.c | 4 ++-- kernel/trace/trace_uprobe.c | 4 ++-- 5 files changed, 13 insertions(+), 10 deletions(-) diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c index 1cc55c50c491..a41fed46c285 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c @@ -47,6 +47,7 @@ int dyn_event_release(int argc, char **argv, struct dyn_event_operations *type) return -EINVAL; event++; } + argc--; argv++; p = strchr(event, '/'); if (p) { @@ -61,7 +62,8 @@ int dyn_event_release(int argc, char **argv, struct dyn_event_operations *type) for_each_dyn_event_safe(pos, n) { if (type && type != pos->ops) continue; - if (!pos->ops->match(system, event, pos)) + if (!pos->ops->match(system, event, + argc, (const char **)argv, pos)) continue; ret = pos->ops->free(pos); diff --git a/kernel/trace/trace_dynevent.h b/kernel/trace/trace_dynevent.h index 8c334064e4d6..46898138d2df 100644 --- a/kernel/trace/trace_dynevent.h +++ b/kernel/trace/trace_dynevent.h @@ -31,8 +31,9 @@ struct dyn_event; * @is_busy: Check whether given event is busy so that it can not be deleted. * Return true if it is busy, otherwides false. * @free: Delete the given event. Return 0 if success, otherwides error. - * @match: Check whether given event and system name match this event. - * Return true if it matches, otherwides false. + * @match: Check whether given event and system name match this event. The argc + * and argv is used for exact match. Return true if it matches, otherwides + * false. * * Except for @create, these methods are called under holding event_mutex. */ @@ -43,7 +44,7 @@ struct dyn_event_operations { bool (*is_busy)(struct dyn_event *ev); int (*free)(struct dyn_event *ev); bool (*match)(const char *system, const char *event, - struct dyn_event *ev); + int argc, const char **argv, struct dyn_event *ev); }; /* Register new dyn_event type -- must be called at first */ diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index ca6b0dff60c5..65e7d071ed28 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -374,7 +374,7 @@ static int synth_event_show(struct seq_file *m, struct dyn_event *ev); static int synth_event_release(struct dyn_event *ev); static bool synth_event_is_busy(struct dyn_event *ev); static bool synth_event_match(const char *system, const char *event, - struct dyn_event *ev); + int argc, const char **argv, struct dyn_event *ev); static struct dyn_event_operations synth_event_ops = { .create = synth_event_create, @@ -422,7 +422,7 @@ static bool synth_event_is_busy(struct dyn_event *ev) } static bool synth_event_match(const char *system, const char *event, - struct dyn_event *ev) + int argc, const char **argv, struct dyn_event *ev) { struct synth_event *sev = to_synth_event(ev); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index eac6344a2e7c..e8f72431b866 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -39,7 +39,7 @@ static int trace_kprobe_show(struct seq_file *m, struct dyn_event *ev); static int trace_kprobe_release(struct dyn_event *ev); static bool trace_kprobe_is_busy(struct dyn_event *ev); static bool trace_kprobe_match(const char *system, const char *event, - struct dyn_event *ev); + int argc, const char **argv, struct dyn_event *ev); static struct dyn_event_operations trace_kprobe_ops = { .create = trace_kprobe_create, @@ -138,7 +138,7 @@ static bool trace_kprobe_is_busy(struct dyn_event *ev) } static bool trace_kprobe_match(const char *system, const char *event, - struct dyn_event *ev) + int argc, const char **argv, struct dyn_event *ev) { struct trace_kprobe *tk = to_trace_kprobe(ev); diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index ac799abb7da9..2862e6829e48 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -44,7 +44,7 @@ static int trace_uprobe_show(struct seq_file *m, struct dyn_event *ev); static int trace_uprobe_release(struct dyn_event *ev); static bool trace_uprobe_is_busy(struct dyn_event *ev); static bool trace_uprobe_match(const char *system, const char *event, - struct dyn_event *ev); + int argc, const char **argv, struct dyn_event *ev); static struct dyn_event_operations trace_uprobe_ops = { .create = trace_uprobe_create, @@ -285,7 +285,7 @@ static bool trace_uprobe_is_busy(struct dyn_event *ev) } static bool trace_uprobe_match(const char *system, const char *event, - struct dyn_event *ev) + int argc, const char **argv, struct dyn_event *ev) { struct trace_uprobe *tu = to_trace_uprobe(ev); From ca89bc071d5e4e981dcc52e0ca90f4500d332e42 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 20 Jun 2019 00:07:49 +0900 Subject: [PATCH 08/33] tracing/kprobe: Add multi-probe per event support Add multi-probe per one event support to kprobe events. User can define several different probes on one trace event if those events have same "event signature", e.g. # echo p:testevent _do_fork > kprobe_events # echo p:testevent fork_idle >> kprobe_events # kprobe_events p:kprobes/testevent _do_fork p:kprobes/testevent fork_idle The event signature is defined by kprobe type (retprobe or not), the number of args, argument names, and argument types. Note that this only support appending method. Delete event operation will delete all probes on the event. Link: http://lkml.kernel.org/r/156095686913.28024.9357292202316540742.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 4 +-- kernel/trace/trace_kprobe.c | 52 ++++++++++++++++++++++++++++---- kernel/trace/trace_probe.c | 59 +++++++++++++++++++++++++++++++------ kernel/trace/trace_probe.h | 14 ++++++++- 4 files changed, 111 insertions(+), 18 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 563e80f9006a..a8505d84b76e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4815,11 +4815,11 @@ static const char readme_msg[] = #endif #endif /* CONFIG_STACK_TRACER */ #ifdef CONFIG_DYNAMIC_EVENTS - " dynamic_events\t\t- Add/remove/show the generic dynamic events\n" + " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n" "\t\t\t Write into this file to define/undefine new trace events.\n" #endif #ifdef CONFIG_KPROBE_EVENTS - " kprobe_events\t\t- Add/remove/show the kernel dynamic events\n" + " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n" "\t\t\t Write into this file to define/undefine new trace events.\n" #endif #ifdef CONFIG_UPROBE_EVENTS diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index e8f72431b866..f43098bf62dd 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -492,6 +492,10 @@ static void __unregister_trace_kprobe(struct trace_kprobe *tk) /* Unregister a trace_probe and probe_event */ static int unregister_trace_kprobe(struct trace_kprobe *tk) { + /* If other probes are on the event, just unregister kprobe */ + if (trace_probe_has_sibling(&tk->tp)) + goto unreg; + /* Enabled event can not be unregistered */ if (trace_probe_is_enabled(&tk->tp)) return -EBUSY; @@ -500,12 +504,38 @@ static int unregister_trace_kprobe(struct trace_kprobe *tk) if (unregister_kprobe_event(tk)) return -EBUSY; +unreg: __unregister_trace_kprobe(tk); dyn_event_remove(&tk->devent); + trace_probe_unlink(&tk->tp); return 0; } +static int append_trace_kprobe(struct trace_kprobe *tk, struct trace_kprobe *to) +{ + int ret; + + /* Append to existing event */ + ret = trace_probe_append(&tk->tp, &to->tp); + if (ret) + return ret; + + /* Register k*probe */ + ret = __register_trace_kprobe(tk); + if (ret == -ENOENT && !trace_kprobe_module_exist(tk)) { + pr_warn("This probe might be able to register after target module is loaded. Continue.\n"); + ret = 0; + } + + if (ret) + trace_probe_unlink(&tk->tp); + else + dyn_event_add(&tk->devent); + + return ret; +} + /* Register a trace_probe and probe_event */ static int register_trace_kprobe(struct trace_kprobe *tk) { @@ -514,14 +544,24 @@ static int register_trace_kprobe(struct trace_kprobe *tk) mutex_lock(&event_mutex); - /* Delete old (same name) event if exist */ old_tk = find_trace_kprobe(trace_probe_name(&tk->tp), trace_probe_group_name(&tk->tp)); if (old_tk) { - ret = unregister_trace_kprobe(old_tk); - if (ret < 0) - goto end; - free_trace_kprobe(old_tk); + if (trace_kprobe_is_return(tk) != trace_kprobe_is_return(old_tk)) { + trace_probe_log_set_index(0); + trace_probe_log_err(0, DIFF_PROBE_TYPE); + ret = -EEXIST; + } else { + ret = trace_probe_compare_arg_type(&tk->tp, &old_tk->tp); + if (ret) { + /* Note that argument starts index = 2 */ + trace_probe_log_set_index(ret + 1); + trace_probe_log_err(0, DIFF_ARG_TYPE); + ret = -EEXIST; + } else + ret = append_trace_kprobe(tk, old_tk); + } + goto end; } /* Register new event */ @@ -755,7 +795,7 @@ static int trace_kprobe_create(int argc, const char *argv[]) trace_probe_log_err(0, BAD_INSN_BNDRY); else if (ret == -ENOENT) trace_probe_log_err(0, BAD_PROBE_ADDR); - else if (ret != -ENOMEM) + else if (ret != -ENOMEM && ret != -EEXIST) trace_probe_log_err(0, FAIL_REG_PROBE); goto error; } diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 28733bd6b607..651a1449acde 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -886,6 +886,35 @@ int traceprobe_define_arg_fields(struct trace_event_call *event_call, return 0; } +static void trace_probe_event_free(struct trace_probe_event *tpe) +{ + kfree(tpe->class.system); + kfree(tpe->call.name); + kfree(tpe->call.print_fmt); + kfree(tpe); +} + +int trace_probe_append(struct trace_probe *tp, struct trace_probe *to) +{ + if (trace_probe_has_sibling(tp)) + return -EBUSY; + + list_del_init(&tp->list); + trace_probe_event_free(tp->event); + + tp->event = to->event; + list_add_tail(&tp->list, trace_probe_probe_list(to)); + + return 0; +} + +void trace_probe_unlink(struct trace_probe *tp) +{ + list_del_init(&tp->list); + if (list_empty(trace_probe_probe_list(tp))) + trace_probe_event_free(tp->event); + tp->event = NULL; +} void trace_probe_cleanup(struct trace_probe *tp) { @@ -894,15 +923,8 @@ void trace_probe_cleanup(struct trace_probe *tp) for (i = 0; i < tp->nr_args; i++) traceprobe_free_probe_arg(&tp->args[i]); - if (tp->event) { - struct trace_event_call *call = trace_probe_event_call(tp); - - kfree(tp->event->class.system); - kfree(call->name); - kfree(call->print_fmt); - kfree(tp->event); - tp->event = NULL; - } + if (tp->event) + trace_probe_unlink(tp); } int trace_probe_init(struct trace_probe *tp, const char *event, @@ -1006,3 +1028,22 @@ int trace_probe_remove_file(struct trace_probe *tp, return 0; } + +/* + * Return the smallest index of different type argument (start from 1). + * If all argument types and name are same, return 0. + */ +int trace_probe_compare_arg_type(struct trace_probe *a, struct trace_probe *b) +{ + int i; + + for (i = 0; i < a->nr_args; i++) { + if ((b->nr_args <= i) || + ((a->args[i].type != b->args[i].type) || + (a->args[i].count != b->args[i].count) || + strcmp(a->args[i].name, b->args[i].name))) + return i + 1; + } + + return 0; +} diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 0b84abb884c2..39926e8a344b 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -302,6 +302,13 @@ static inline struct list_head *trace_probe_probe_list(struct trace_probe *tp) return &tp->event->probes; } +static inline bool trace_probe_has_sibling(struct trace_probe *tp) +{ + struct list_head *list = trace_probe_probe_list(tp); + + return !list_empty(list) && !list_is_singular(list); +} + static inline int trace_probe_unregister_event_call(struct trace_probe *tp) { /* tp->event is unregistered in trace_remove_event_call() */ @@ -316,12 +323,15 @@ static inline bool trace_probe_has_single_file(struct trace_probe *tp) int trace_probe_init(struct trace_probe *tp, const char *event, const char *group); void trace_probe_cleanup(struct trace_probe *tp); +int trace_probe_append(struct trace_probe *tp, struct trace_probe *to); +void trace_probe_unlink(struct trace_probe *tp); int trace_probe_register_event_call(struct trace_probe *tp); int trace_probe_add_file(struct trace_probe *tp, struct trace_event_file *file); int trace_probe_remove_file(struct trace_probe *tp, struct trace_event_file *file); struct event_file_link *trace_probe_get_file_link(struct trace_probe *tp, struct trace_event_file *file); +int trace_probe_compare_arg_type(struct trace_probe *a, struct trace_probe *b); #define trace_probe_for_each_link(pos, tp) \ list_for_each_entry(pos, &(tp)->event->files, list) @@ -419,7 +429,9 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call, C(ARG_TOO_LONG, "Argument expression is too long"), \ C(NO_ARG_BODY, "No argument expression"), \ C(BAD_INSN_BNDRY, "Probe point is not an instruction boundary"),\ - C(FAIL_REG_PROBE, "Failed to register probe event"), + C(FAIL_REG_PROBE, "Failed to register probe event"),\ + C(DIFF_PROBE_TYPE, "Probe type is different from existing probe"),\ + C(DIFF_ARG_TYPE, "Argument type or name is different from existing probe"), #undef C #define C(a, b) TP_ERR_##a From 41af3cf587f476f9a879b08219324c8b456e6a4c Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 20 Jun 2019 00:07:58 +0900 Subject: [PATCH 09/33] tracing/uprobe: Add multi-probe per uprobe event support Allow user to define several probes on one uprobe event. Note that this only support appending method. So deleting event will delete all probes on the event. Link: http://lkml.kernel.org/r/156095687876.28024.13840331032234992863.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 2 +- kernel/trace/trace_uprobe.c | 60 ++++++++++++++++++++++++++----------- 2 files changed, 43 insertions(+), 19 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a8505d84b76e..c7797a81a37e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4823,7 +4823,7 @@ static const char readme_msg[] = "\t\t\t Write into this file to define/undefine new trace events.\n" #endif #ifdef CONFIG_UPROBE_EVENTS - " uprobe_events\t\t- Add/remove/show the userspace dynamic events\n" + " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n" "\t\t\t Write into this file to define/undefine new trace events.\n" #endif #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 2862e6829e48..d84e09abb8de 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -364,15 +364,32 @@ static int unregister_trace_uprobe(struct trace_uprobe *tu) { int ret; + if (trace_probe_has_sibling(&tu->tp)) + goto unreg; + ret = unregister_uprobe_event(tu); if (ret) return ret; +unreg: dyn_event_remove(&tu->devent); + trace_probe_unlink(&tu->tp); free_trace_uprobe(tu); return 0; } +static int append_trace_uprobe(struct trace_uprobe *tu, struct trace_uprobe *to) +{ + int ret; + + /* Append to existing event */ + ret = trace_probe_append(&tu->tp, &to->tp); + if (!ret) + dyn_event_add(&tu->devent); + + return ret; +} + /* * Uprobe with multiple reference counter is not allowed. i.e. * If inode and offset matches, reference counter offset *must* @@ -382,25 +399,21 @@ static int unregister_trace_uprobe(struct trace_uprobe *tu) * as the new one does not conflict with any other existing * ones. */ -static struct trace_uprobe *find_old_trace_uprobe(struct trace_uprobe *new) +static int validate_ref_ctr_offset(struct trace_uprobe *new) { struct dyn_event *pos; - struct trace_uprobe *tmp, *old = NULL; + struct trace_uprobe *tmp; struct inode *new_inode = d_real_inode(new->path.dentry); - old = find_probe_event(trace_probe_name(&new->tp), - trace_probe_group_name(&new->tp)); - for_each_trace_uprobe(tmp, pos) { - if ((old ? old != tmp : true) && - new_inode == d_real_inode(tmp->path.dentry) && + if (new_inode == d_real_inode(tmp->path.dentry) && new->offset == tmp->offset && new->ref_ctr_offset != tmp->ref_ctr_offset) { pr_warn("Reference counter offset mismatch."); - return ERR_PTR(-EINVAL); + return -EINVAL; } } - return old; + return 0; } /* Register a trace_uprobe and probe_event */ @@ -411,18 +424,29 @@ static int register_trace_uprobe(struct trace_uprobe *tu) mutex_lock(&event_mutex); - /* register as an event */ - old_tu = find_old_trace_uprobe(tu); - if (IS_ERR(old_tu)) { - ret = PTR_ERR(old_tu); + ret = validate_ref_ctr_offset(tu); + if (ret) goto end; - } + /* register as an event */ + old_tu = find_probe_event(trace_probe_name(&tu->tp), + trace_probe_group_name(&tu->tp)); if (old_tu) { - /* delete old event */ - ret = unregister_trace_uprobe(old_tu); - if (ret) - goto end; + if (is_ret_probe(tu) != is_ret_probe(old_tu)) { + trace_probe_log_set_index(0); + trace_probe_log_err(0, DIFF_PROBE_TYPE); + ret = -EEXIST; + } else { + ret = trace_probe_compare_arg_type(&tu->tp, &old_tu->tp); + if (ret) { + /* Note that argument starts index = 2 */ + trace_probe_log_set_index(ret + 1); + trace_probe_log_err(0, DIFF_ARG_TYPE); + ret = -EEXIST; + } else + ret = append_trace_uprobe(tu, old_tu); + } + goto end; } ret = register_uprobe_event(tu); From eb5bf81330a722d0079d28eed13d3a9355d938bf Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 20 Jun 2019 00:08:08 +0900 Subject: [PATCH 10/33] tracing/kprobe: Add per-probe delete from event Allow user to delete a probe from event. This is done by head match. For example, if we have 2 probes on an event $ cat kprobe_events p:kprobes/testprobe _do_fork r1=%ax r2=%dx p:kprobes/testprobe idle_fork r1=%ax r2=%cx Then you can remove one of them by passing the head of definition which identify the probe. $ echo "-:kprobes/testprobe idle_fork" >> kprobe_events Link: http://lkml.kernel.org/r/156095688848.28024.15798690082378432435.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 25 ++++++++++++++++++++++++- kernel/trace/trace_probe.c | 18 ++++++++++++++++++ kernel/trace/trace_probe.h | 2 ++ 3 files changed, 44 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index f43098bf62dd..18c4175b6585 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -137,13 +137,36 @@ static bool trace_kprobe_is_busy(struct dyn_event *ev) return trace_probe_is_enabled(&tk->tp); } +static bool trace_kprobe_match_command_head(struct trace_kprobe *tk, + int argc, const char **argv) +{ + char buf[MAX_ARGSTR_LEN + 1]; + + if (!argc) + return true; + + if (!tk->symbol) + snprintf(buf, sizeof(buf), "0x%p", tk->rp.kp.addr); + else if (tk->rp.kp.offset) + snprintf(buf, sizeof(buf), "%s+%u", + trace_kprobe_symbol(tk), tk->rp.kp.offset); + else + snprintf(buf, sizeof(buf), "%s", trace_kprobe_symbol(tk)); + if (strcmp(buf, argv[0])) + return false; + argc--; argv++; + + return trace_probe_match_command_args(&tk->tp, argc, argv); +} + static bool trace_kprobe_match(const char *system, const char *event, int argc, const char **argv, struct dyn_event *ev) { struct trace_kprobe *tk = to_trace_kprobe(ev); return strcmp(trace_probe_name(&tk->tp), event) == 0 && - (!system || strcmp(trace_probe_group_name(&tk->tp), system) == 0); + (!system || strcmp(trace_probe_group_name(&tk->tp), system) == 0) && + trace_kprobe_match_command_head(tk, argc, argv); } static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk) diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 651a1449acde..f8c3c65c035d 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -1047,3 +1047,21 @@ int trace_probe_compare_arg_type(struct trace_probe *a, struct trace_probe *b) return 0; } + +bool trace_probe_match_command_args(struct trace_probe *tp, + int argc, const char **argv) +{ + char buf[MAX_ARGSTR_LEN + 1]; + int i; + + if (tp->nr_args < argc) + return false; + + for (i = 0; i < argc; i++) { + snprintf(buf, sizeof(buf), "%s=%s", + tp->args[i].name, tp->args[i].comm); + if (strcmp(buf, argv[i])) + return false; + } + return true; +} diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 39926e8a344b..2dcc4e317787 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -332,6 +332,8 @@ int trace_probe_remove_file(struct trace_probe *tp, struct event_file_link *trace_probe_get_file_link(struct trace_probe *tp, struct trace_event_file *file); int trace_probe_compare_arg_type(struct trace_probe *a, struct trace_probe *b); +bool trace_probe_match_command_args(struct trace_probe *tp, + int argc, const char **argv); #define trace_probe_for_each_link(pos, tp) \ list_for_each_entry(pos, &(tp)->event->files, list) From ab10d69eb714961d1eca4129e4f8cda5e0618f66 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 20 Jun 2019 00:08:18 +0900 Subject: [PATCH 11/33] tracing/uprobe: Add per-probe delete from event Add per-probe delete method from one event passing the head of definition. In other words, the events which match the head N parameters are deleted. Link: http://lkml.kernel.org/r/156095689811.28024.221706761151739433.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_uprobe.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index d84e09abb8de..84925b5b6db5 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -284,13 +284,42 @@ static bool trace_uprobe_is_busy(struct dyn_event *ev) return trace_probe_is_enabled(&tu->tp); } +static bool trace_uprobe_match_command_head(struct trace_uprobe *tu, + int argc, const char **argv) +{ + char buf[MAX_ARGSTR_LEN + 1]; + int len; + + if (!argc) + return true; + + len = strlen(tu->filename); + if (strncmp(tu->filename, argv[0], len) || argv[0][len] != ':') + return false; + + if (tu->ref_ctr_offset == 0) + snprintf(buf, sizeof(buf), "0x%0*lx", + (int)(sizeof(void *) * 2), tu->offset); + else + snprintf(buf, sizeof(buf), "0x%0*lx(0x%lx)", + (int)(sizeof(void *) * 2), tu->offset, + tu->ref_ctr_offset); + if (strcmp(buf, &argv[0][len + 1])) + return false; + + argc--; argv++; + + return trace_probe_match_command_args(&tu->tp, argc, argv); +} + static bool trace_uprobe_match(const char *system, const char *event, int argc, const char **argv, struct dyn_event *ev) { struct trace_uprobe *tu = to_trace_uprobe(ev); return strcmp(trace_probe_name(&tu->tp), event) == 0 && - (!system || strcmp(trace_probe_group_name(&tu->tp), system) == 0); + (!system || strcmp(trace_probe_group_name(&tu->tp), system) == 0) && + trace_uprobe_match_command_head(tu, argc, argv); } static nokprobe_inline struct trace_uprobe * From 6218bf9f4d2942e88d97b60abc8c2ca0532e41a8 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 20 Jun 2019 00:08:27 +0900 Subject: [PATCH 12/33] tracing/probe: Add immediate parameter support Add immediate value parameter (\1234) support to probe events. This allows you to specify an immediate (or dummy) parameter instead of fetching from memory or register. This feature looks odd, but imagine when you put a probe on a code to trace some data. If the code is compiled into 2 instructions and 1 instruction has a value but other has nothing since it is optimized out. In that case, you can not fold those into one event, even if ftrace supported multiple probes on one event. With this feature, you can set a dummy value like foo=\deadbeef instead of something like foo=%di. Link: http://lkml.kernel.org/r/156095690733.28024.13258186548822649469.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- Documentation/trace/kprobetrace.rst | 1 + Documentation/trace/uprobetracer.rst | 1 + kernel/trace/trace.c | 2 +- kernel/trace/trace_probe.c | 18 ++++++++++++++++++ kernel/trace/trace_probe.h | 1 + 5 files changed, 22 insertions(+), 1 deletion(-) diff --git a/Documentation/trace/kprobetrace.rst b/Documentation/trace/kprobetrace.rst index fbb314bfa112..55993055902c 100644 --- a/Documentation/trace/kprobetrace.rst +++ b/Documentation/trace/kprobetrace.rst @@ -52,6 +52,7 @@ Synopsis of kprobe_events $retval : Fetch return value.(\*2) $comm : Fetch current task comm. +|-[u]OFFS(FETCHARG) : Fetch memory at FETCHARG +|- OFFS address.(\*3)(\*4) + \IMM : Store an immediate value to the argument. NAME=FETCHARG : Set NAME as the argument name of FETCHARG. FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal types diff --git a/Documentation/trace/uprobetracer.rst b/Documentation/trace/uprobetracer.rst index 6e75a6c5a2c8..98cde99939d7 100644 --- a/Documentation/trace/uprobetracer.rst +++ b/Documentation/trace/uprobetracer.rst @@ -45,6 +45,7 @@ Synopsis of uprobe_tracer $retval : Fetch return value.(\*1) $comm : Fetch current task comm. +|-[u]OFFS(FETCHARG) : Fetch memory at FETCHARG +|- OFFS address.(\*2)(\*3) + \IMM : Store an immediate value to the argument. NAME=FETCHARG : Set NAME as the argument name of FETCHARG. FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal types diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c7797a81a37e..fb4003c10151 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4848,7 +4848,7 @@ static const char readme_msg[] = #else "\t $stack, $stack, $retval, $comm,\n" #endif - "\t +|-[u]()\n" + "\t +|-[u](), \\imm-value\n" "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n" "\t b@/, ustring,\n" "\t \\[\\]\n" diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index f8c3c65c035d..fb90baec3cd8 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -316,6 +316,17 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t, return -EINVAL; } +static int str_to_immediate(char *str, unsigned long *imm) +{ + if (isdigit(str[0])) + return kstrtoul(str, 0, imm); + else if (str[0] == '-') + return kstrtol(str, 0, (long *)imm); + else if (str[0] == '+') + return kstrtol(str + 1, 0, (long *)imm); + return -EINVAL; +} + /* Recursive argument parser */ static int parse_probe_arg(char *arg, const struct fetch_type *type, @@ -444,6 +455,13 @@ parse_probe_arg(char *arg, const struct fetch_type *type, code->offset = offset; } break; + case '\\': /* Immediate value */ + ret = str_to_immediate(arg + 1, &code->immediate); + if (ret) + trace_probe_log_err(offs + 1, BAD_IMM); + else + code->op = FETCH_OP_IMM; + break; } if (!ret && code->op == FETCH_OP_NOP) { /* Parsed, but do not find fetch method */ diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 2dcc4e317787..cc113b82a4ce 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -408,6 +408,7 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call, C(BAD_VAR, "Invalid $-valiable specified"), \ C(BAD_REG_NAME, "Invalid register name"), \ C(BAD_MEM_ADDR, "Invalid memory address"), \ + C(BAD_IMM, "Invalid immediate value"), \ C(FILE_ON_KPROBE, "File offset is not available with kprobe"), \ C(BAD_FILE_OFFS, "Invalid file offset value"), \ C(SYM_ON_UPROBE, "Symbol is not available with uprobe"), \ From a42e3c4de9642d5de524a0a48a7ce96872662dca Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 20 Jun 2019 00:08:37 +0900 Subject: [PATCH 13/33] tracing/probe: Add immediate string parameter support Add immediate string parameter (\"string") support to probe events. This allows you to specify an immediate (or dummy) parameter instead of fetching a string from memory. This feature looks odd, but imagine that you put a probe on a code to trace some string data. If the code is compiled into 2 instructions and 1 instruction has a string on memory but other has no string since it is optimized out. In that case, you can not fold those into one event, even if ftrace supported multiple probes on one event. With this feature, you can set a dummy string like foo=\"(optimized)":string instead of something like foo=+0(+0(%bp)):string. Link: http://lkml.kernel.org/r/156095691687.28024.13372712423865047991.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 2 +- kernel/trace/trace_kprobe.c | 3 ++ kernel/trace/trace_probe.c | 56 +++++++++++++++++++++++++++---------- kernel/trace/trace_probe.h | 2 ++ kernel/trace/trace_uprobe.c | 3 ++ 5 files changed, 51 insertions(+), 15 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index fb4003c10151..3916b72de715 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4848,7 +4848,7 @@ static const char readme_msg[] = #else "\t $stack, $stack, $retval, $comm,\n" #endif - "\t +|-[u](), \\imm-value\n" + "\t +|-[u](), \\imm-value, \\\"imm-string\"\n" "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n" "\t b@/, ustring,\n" "\t \\[\\]\n" diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 18c4175b6585..7579c53bb053 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1083,6 +1083,9 @@ process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest, case FETCH_OP_COMM: val = (unsigned long)current->comm; break; + case FETCH_OP_DATA: + val = (unsigned long)code->data; + break; #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API case FETCH_OP_ARG: val = regs_get_kernel_argument(regs, code->param); diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index fb90baec3cd8..1e67fef06e53 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -327,6 +327,18 @@ static int str_to_immediate(char *str, unsigned long *imm) return -EINVAL; } +static int __parse_imm_string(char *str, char **pbuf, int offs) +{ + size_t len = strlen(str); + + if (str[len - 1] != '"') { + trace_probe_log_err(offs + len, IMMSTR_NO_CLOSE); + return -EINVAL; + } + *pbuf = kstrndup(str, len - 1, GFP_KERNEL); + return 0; +} + /* Recursive argument parser */ static int parse_probe_arg(char *arg, const struct fetch_type *type, @@ -441,7 +453,8 @@ parse_probe_arg(char *arg, const struct fetch_type *type, ret = parse_probe_arg(arg, t2, &code, end, flags, offs); if (ret) break; - if (code->op == FETCH_OP_COMM) { + if (code->op == FETCH_OP_COMM || + code->op == FETCH_OP_DATA) { trace_probe_log_err(offs, COMM_CANT_DEREF); return -EINVAL; } @@ -456,11 +469,19 @@ parse_probe_arg(char *arg, const struct fetch_type *type, } break; case '\\': /* Immediate value */ - ret = str_to_immediate(arg + 1, &code->immediate); - if (ret) - trace_probe_log_err(offs + 1, BAD_IMM); - else - code->op = FETCH_OP_IMM; + if (arg[1] == '"') { /* Immediate string */ + ret = __parse_imm_string(arg + 2, &tmp, offs + 2); + if (ret) + break; + code->op = FETCH_OP_DATA; + code->data = tmp; + } else { + ret = str_to_immediate(arg + 1, &code->immediate); + if (ret) + trace_probe_log_err(offs + 1, BAD_IMM); + else + code->op = FETCH_OP_IMM; + } break; } if (!ret && code->op == FETCH_OP_NOP) { @@ -560,8 +581,11 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, } } - /* Since $comm can not be dereferred, we can find $comm by strcmp */ - if (strcmp(arg, "$comm") == 0) { + /* + * Since $comm and immediate string can not be dereferred, + * we can find those by strcmp. + */ + if (strcmp(arg, "$comm") == 0 || strncmp(arg, "\\\"", 2) == 0) { /* The type of $comm must be "string", and not an array. */ if (parg->count || (t && strcmp(t, "string"))) return -EINVAL; @@ -598,7 +622,8 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, if (!strcmp(parg->type->name, "string") || !strcmp(parg->type->name, "ustring")) { if (code->op != FETCH_OP_DEREF && code->op != FETCH_OP_UDEREF && - code->op != FETCH_OP_IMM && code->op != FETCH_OP_COMM) { + code->op != FETCH_OP_IMM && code->op != FETCH_OP_COMM && + code->op != FETCH_OP_DATA) { trace_probe_log_err(offset + (t ? (t - arg) : 0), BAD_STRING); ret = -EINVAL; @@ -607,9 +632,10 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, if ((code->op == FETCH_OP_IMM || code->op == FETCH_OP_COMM) || parg->count) { /* - * IMM and COMM is pointing actual address, those must - * be kept, and if parg->count != 0, this is an array - * of string pointers instead of string address itself. + * IMM, DATA and COMM is pointing actual address, those + * must be kept, and if parg->count != 0, this is an + * array of string pointers instead of string address + * itself. */ code++; if (code->op != FETCH_OP_NOP) { @@ -683,7 +709,8 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, fail: if (ret) { for (code = tmp; code < tmp + FETCH_INSN_MAX; code++) - if (code->op == FETCH_NOP_SYMBOL) + if (code->op == FETCH_NOP_SYMBOL || + code->op == FETCH_OP_DATA) kfree(code->data); } kfree(tmp); @@ -754,7 +781,8 @@ void traceprobe_free_probe_arg(struct probe_arg *arg) struct fetch_insn *code = arg->code; while (code && code->op != FETCH_OP_END) { - if (code->op == FETCH_NOP_SYMBOL) + if (code->op == FETCH_NOP_SYMBOL || + code->op == FETCH_OP_DATA) kfree(code->data); code++; } diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index cc113b82a4ce..f805cc4cbe7c 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -89,6 +89,7 @@ enum fetch_op { FETCH_OP_COMM, /* Current comm */ FETCH_OP_ARG, /* Function argument : .param */ FETCH_OP_FOFFS, /* File offset: .immediate */ + FETCH_OP_DATA, /* Allocated data: .data */ // Stage 2 (dereference) op FETCH_OP_DEREF, /* Dereference: .offset */ FETCH_OP_UDEREF, /* User-space Dereference: .offset */ @@ -409,6 +410,7 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call, C(BAD_REG_NAME, "Invalid register name"), \ C(BAD_MEM_ADDR, "Invalid memory address"), \ C(BAD_IMM, "Invalid immediate value"), \ + C(IMMSTR_NO_CLOSE, "String is not closed with '\"'"), \ C(FILE_ON_KPROBE, "File offset is not available with kprobe"), \ C(BAD_FILE_OFFS, "Invalid file offset value"), \ C(SYM_ON_UPROBE, "Symbol is not available with uprobe"), \ diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 84925b5b6db5..cbf4da4bf367 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -248,6 +248,9 @@ process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest, case FETCH_OP_COMM: val = FETCH_TOKEN_COMM; break; + case FETCH_OP_DATA: + val = (unsigned long)code->data; + break; case FETCH_OP_FOFFS: val = translate_user_vaddr(code->immediate); break; From cf881485eb2af78c541c6f8ec71dfc7e0cb0e997 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 20 Jun 2019 00:08:46 +0900 Subject: [PATCH 14/33] selftests/ftrace: Add a testcase for kprobe multiprobe event Add a testcase for kprobe event with multi-probe. Link: http://lkml.kernel.org/r/156095692637.28024.17188971794698768977.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- .../ftrace/test.d/kprobe/kprobe_multiprobe.tc | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc new file mode 100644 index 000000000000..44494bac86d1 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc @@ -0,0 +1,35 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Create/delete multiprobe on kprobe event + +[ -f kprobe_events ] || exit_unsupported + +grep -q "Create/append/" README || exit_unsupported + +# Choose 2 symbols for target +SYM1=_do_fork +SYM2=do_exit +EVENT_NAME=kprobes/testevent + +DEF1="p:$EVENT_NAME $SYM1" +DEF2="p:$EVENT_NAME $SYM2" + +:;: "Define an event which has 2 probes" ;: +echo $DEF1 >> kprobe_events +echo $DEF2 >> kprobe_events +cat kprobe_events | grep "$DEF1" +cat kprobe_events | grep "$DEF2" + +:;: "Remove the event by name (should remove both)" ;: +echo "-:$EVENT_NAME" >> kprobe_events +test `cat kprobe_events | wc -l` -eq 0 + +:;: "Remove just 1 event" ;: +echo $DEF1 >> kprobe_events +echo $DEF2 >> kprobe_events +echo "-:$EVENT_NAME $SYM1" >> kprobe_events +! cat kprobe_events | grep "$DEF1" +cat kprobe_events | grep "$DEF2" + +:;: "Appending different type must fail" ;: +! echo "$DEF1 \$stack" >> kprobe_events From 3e662c54a15c4c4409c45e6133107a3107796a96 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 20 Jun 2019 00:08:55 +0900 Subject: [PATCH 15/33] selftests/ftrace: Add syntax error test for immediates Add syntax error test cases for immediate value and immediate string. Link: http://lkml.kernel.org/r/156095693553.28024.7730929892585591691.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- .../selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc index 29faaec942c6..aa59944bcace 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc @@ -41,6 +41,11 @@ check_error 'p vfs_read ^%none_reg' # BAD_REG_NAME check_error 'p vfs_read ^@12345678abcde' # BAD_MEM_ADDR check_error 'p vfs_read ^@+10' # FILE_ON_KPROBE +grep -q "imm-value" README && \ +check_error 'p vfs_read arg1=\^x' # BAD_IMM +grep -q "imm-string" README && \ +check_error 'p vfs_read arg1=\"abcd^' # IMMSTR_NO_CLOSE + check_error 'p vfs_read ^+0@0)' # DEREF_NEED_BRACE check_error 'p vfs_read ^+0ab1(@0)' # BAD_DEREF_OFFS check_error 'p vfs_read +0(+0(@0^)' # DEREF_OPEN_BRACE From 7f5291da4b15ad0ebc0965af41a1a2e3f04c3f08 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 20 Jun 2019 00:09:05 +0900 Subject: [PATCH 16/33] selftests/ftrace: Add syntax error test for multiprobe Add syntax error test cases for multiprobe appending errors. Link: http://lkml.kernel.org/r/156095694541.28024.11918630805148623119.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- tools/testing/selftests/ftrace/test.d/functions | 2 +- .../ftrace/test.d/kprobe/kprobe_syntax_errors.tc | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index 1d96c5f7e402..86986c4bba54 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -115,7 +115,7 @@ ftrace_errlog_check() { # err-prefix command-with-error-pos-by-^ command-file command=$(echo "$2" | tr -d ^) echo "Test command: $command" echo > error_log - (! echo "$command" > "$3" ) 2> /dev/null + (! echo "$command" >> "$3" ) 2> /dev/null grep "$1: error:" -A 3 error_log N=$(tail -n 1 error_log | wc -c) # " Command: " and "^\n" => 13 diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc index aa59944bcace..39ef7ac1f51c 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc @@ -87,4 +87,14 @@ case $(uname -m) in ;; esac +# multiprobe errors +if grep -q "Create/append/" README && grep -q "imm-value" README; then +echo 'p:kprobes/testevent _do_fork' > kprobe_events +check_error '^r:kprobes/testevent do_exit' # DIFF_PROBE_TYPE +echo 'p:kprobes/testevent _do_fork abcd=\1' > kprobe_events +check_error 'p:kprobes/testevent _do_fork ^bcd=\1' # DIFF_ARG_TYPE +check_error 'p:kprobes/testevent _do_fork ^abcd=\1:u8' # DIFF_ARG_TYPE +check_error 'p:kprobes/testevent _do_fork ^abcd=\"foo"' # DIFF_ARG_TYPE +fi + exit 0 From 3f1df12019f333442b12c3b5d110b8fc43eb0b36 Mon Sep 17 00:00:00 2001 From: Matt Helsley Date: Wed, 31 Jul 2019 11:24:12 -0700 Subject: [PATCH 17/33] recordmcount: Rewrite error/success handling Recordmcount uses setjmp/longjmp to manage control flow as it reads and then writes the ELF file. This unusual control flow is hard to follow and check in addition to being unlike kernel coding style. So we rewrite these paths to use regular return values to indicate error/success. When an error or previously-completed object file is found we return an error code following kernel coding conventions -- negative error values and 0 for success when we're not returning a pointer. We return NULL for those that fail and return non-NULL pointers otherwise. One oddity is already_has_rel_mcount -- there we use pointer comparison rather than string comparison to differentiate between previously-processed object files and returning the name of a text section. Link: http://lkml.kernel.org/r/8ba8633d4afe444931f363c8d924bf9565b89a86.1564596289.git.mhelsley@vmware.com Signed-off-by: Matt Helsley Signed-off-by: Steven Rostedt (VMware) --- scripts/recordmcount.c | 162 +++++++++++++++++++++-------------------- scripts/recordmcount.h | 141 ++++++++++++++++++++++++----------- 2 files changed, 184 insertions(+), 119 deletions(-) diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index 1fe5fba99959..c6d395b8ff29 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include @@ -43,7 +42,6 @@ static int fd_map; /* File descriptor for file being modified. */ static int mmap_failed; /* Boolean flag. */ static char gpfx; /* prefix for global symbol name (sometimes '_') */ static struct stat sb; /* Remember .st_size, etc. */ -static jmp_buf jmpenv; /* setjmp/longjmp per-file error escape */ static const char *altmcount; /* alternate mcount symbol name */ static int warn_on_notrace_sect; /* warn when section has mcount not being recorded */ static void *file_map; /* pointer of the mapped file */ @@ -53,13 +51,6 @@ static void *file_ptr; /* current file pointer location */ static void *file_append; /* added to the end of the file */ static size_t file_append_size; /* how much is added to end of file */ -/* setjmp() return values */ -enum { - SJ_SETJMP = 0, /* hardwired first return */ - SJ_FAIL, - SJ_SUCCEED -}; - /* Per-file resource cleanup when multiple files. */ static void cleanup(void) @@ -75,20 +66,6 @@ cleanup(void) file_updated = 0; } -static void __attribute__((noreturn)) -fail_file(void) -{ - cleanup(); - longjmp(jmpenv, SJ_FAIL); -} - -static void __attribute__((noreturn)) -succeed_file(void) -{ - cleanup(); - longjmp(jmpenv, SJ_SUCCEED); -} - /* ulseek, uwrite, ...: Check return value for errors. */ static off_t @@ -107,12 +84,12 @@ ulseek(off_t const offset, int const whence) } if (file_ptr < file_map) { fprintf(stderr, "lseek: seek before file\n"); - fail_file(); + return -1; } return file_ptr - file_map; } -static size_t +static ssize_t uwrite(void const *const buf, size_t const count) { size_t cnt = count; @@ -129,7 +106,8 @@ uwrite(void const *const buf, size_t const count) } if (!file_append) { perror("write"); - fail_file(); + cleanup(); + return -1; } if (file_ptr < file_end) { cnt = file_end - file_ptr; @@ -155,7 +133,8 @@ umalloc(size_t size) void *const addr = malloc(size); if (addr == 0) { fprintf(stderr, "malloc failed: %zu bytes\n", size); - fail_file(); + cleanup(); + return NULL; } return addr; } @@ -183,8 +162,10 @@ static int make_nop_x86(void *map, size_t const offset) return -1; /* convert to nop */ - ulseek(offset - 1, SEEK_SET); - uwrite(ideal_nop, 5); + if (ulseek(offset - 1, SEEK_SET) < 0) + return -1; + if (uwrite(ideal_nop, 5) < 0) + return -1; return 0; } @@ -232,10 +213,12 @@ static int make_nop_arm(void *map, size_t const offset) return -1; /* Convert to nop */ - ulseek(off, SEEK_SET); + if (ulseek(off, SEEK_SET) < 0) + return -1; do { - uwrite(ideal_nop, nop_size); + if (uwrite(ideal_nop, nop_size) < 0) + return -1; } while (--cnt > 0); return 0; @@ -252,8 +235,10 @@ static int make_nop_arm64(void *map, size_t const offset) return -1; /* Convert to nop */ - ulseek(offset, SEEK_SET); - uwrite(ideal_nop, 4); + if (ulseek(offset, SEEK_SET) < 0) + return -1; + if (uwrite(ideal_nop, 4) < 0) + return -1; return 0; } @@ -272,14 +257,23 @@ static int make_nop_arm64(void *map, size_t const offset) */ static void *mmap_file(char const *fname) { + file_map = NULL; + sb.st_size = 0; fd_map = open(fname, O_RDONLY); - if (fd_map < 0 || fstat(fd_map, &sb) < 0) { + if (fd_map < 0) { perror(fname); - fail_file(); + cleanup(); + return NULL; + } + if (fstat(fd_map, &sb) < 0) { + perror(fname); + cleanup(); + goto out; } if (!S_ISREG(sb.st_mode)) { fprintf(stderr, "not a regular file: %s\n", fname); - fail_file(); + cleanup(); + goto out; } file_map = mmap(0, sb.st_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd_map, 0); @@ -287,11 +281,18 @@ static void *mmap_file(char const *fname) if (file_map == MAP_FAILED) { mmap_failed = 1; file_map = umalloc(sb.st_size); + if (!file_map) { + perror(fname); + goto out; + } if (read(fd_map, file_map, sb.st_size) != sb.st_size) { perror(fname); - fail_file(); + free(file_map); + file_map = NULL; + goto out; } } +out: close(fd_map); file_end = file_map + sb.st_size; @@ -299,13 +300,13 @@ static void *mmap_file(char const *fname) return file_map; } -static void write_file(const char *fname) +static int write_file(const char *fname) { char tmp_file[strlen(fname) + 4]; size_t n; if (!file_updated) - return; + return 0; sprintf(tmp_file, "%s.rc", fname); @@ -317,25 +318,32 @@ static void write_file(const char *fname) fd_map = open(tmp_file, O_WRONLY | O_TRUNC | O_CREAT, sb.st_mode); if (fd_map < 0) { perror(fname); - fail_file(); + cleanup(); + return -1; } n = write(fd_map, file_map, sb.st_size); if (n != sb.st_size) { perror("write"); - fail_file(); + cleanup(); + close(fd_map); + return -1; } if (file_append_size) { n = write(fd_map, file_append, file_append_size); if (n != file_append_size) { perror("write"); - fail_file(); + cleanup(); + close(fd_map); + return -1; } } close(fd_map); if (rename(tmp_file, fname) < 0) { perror(fname); - fail_file(); + cleanup(); + return -1; } + return 0; } /* w8rev, w8nat, ...: Handle endianness. */ @@ -400,6 +408,8 @@ is_mcounted_section_name(char const *const txtname) strcmp(".cpuidle.text", txtname) == 0; } +static char const *already_has_rel_mcount = "success"; /* our work here is done! */ + /* 32 bit and 64 bit are very similar */ #include "recordmcount.h" #define RECORD_MCOUNT_64 @@ -438,11 +448,15 @@ static void MIPS64_r_info(Elf64_Rel *const rp, unsigned sym, unsigned type) }).r_info; } -static void +static int do_file(char const *const fname) { Elf32_Ehdr *const ehdr = mmap_file(fname); unsigned int reltype = 0; + int rc = -1; + + if (!ehdr) + goto out; w = w4nat; w2 = w2nat; @@ -452,8 +466,8 @@ do_file(char const *const fname) default: fprintf(stderr, "unrecognized ELF data encoding %d: %s\n", ehdr->e_ident[EI_DATA], fname); - fail_file(); - break; + cleanup(); + goto out; case ELFDATA2LSB: if (*(unsigned char const *)&endian != 1) { /* main() is big endian, file.o is little endian. */ @@ -485,7 +499,8 @@ do_file(char const *const fname) || w2(ehdr->e_type) != ET_REL || ehdr->e_ident[EI_VERSION] != EV_CURRENT) { fprintf(stderr, "unrecognized ET_REL file %s\n", fname); - fail_file(); + cleanup(); + goto out; } gpfx = 0; @@ -493,8 +508,8 @@ do_file(char const *const fname) default: fprintf(stderr, "unrecognized e_machine %u %s\n", w2(ehdr->e_machine), fname); - fail_file(); - break; + cleanup(); + goto out; case EM_386: reltype = R_386_32; rel_type_nop = R_386_NONE; @@ -534,20 +549,22 @@ do_file(char const *const fname) default: fprintf(stderr, "unrecognized ELF class %d %s\n", ehdr->e_ident[EI_CLASS], fname); - fail_file(); - break; + cleanup(); + goto out; case ELFCLASS32: if (w2(ehdr->e_ehsize) != sizeof(Elf32_Ehdr) || w2(ehdr->e_shentsize) != sizeof(Elf32_Shdr)) { fprintf(stderr, "unrecognized ET_REL file: %s\n", fname); - fail_file(); + cleanup(); + goto out; } if (w2(ehdr->e_machine) == EM_MIPS) { reltype = R_MIPS_32; is_fake_mcount32 = MIPS32_is_fake_mcount; } - do32(ehdr, fname, reltype); + if (do32(ehdr, fname, reltype) < 0) + goto out; break; case ELFCLASS64: { Elf64_Ehdr *const ghdr = (Elf64_Ehdr *)ehdr; @@ -555,7 +572,8 @@ do_file(char const *const fname) || w2(ghdr->e_shentsize) != sizeof(Elf64_Shdr)) { fprintf(stderr, "unrecognized ET_REL file: %s\n", fname); - fail_file(); + cleanup(); + goto out; } if (w2(ghdr->e_machine) == EM_S390) { reltype = R_390_64; @@ -567,13 +585,16 @@ do_file(char const *const fname) Elf64_r_info = MIPS64_r_info; is_fake_mcount64 = MIPS64_is_fake_mcount; } - do64(ghdr, fname, reltype); + if (do64(ghdr, fname, reltype) < 0) + goto out; break; } } /* end switch */ - write_file(fname); + rc = write_file(fname); +out: cleanup(); + return rc; } int @@ -604,7 +625,6 @@ main(int argc, char *argv[]) /* Process each file in turn, allowing deep failure. */ for (i = optind; i < argc; i++) { char *file = argv[i]; - int const sjval = setjmp(jmpenv); int len; /* @@ -617,28 +637,16 @@ main(int argc, char *argv[]) strcmp(file + (len - ftrace_size), ftrace) == 0) continue; - switch (sjval) { - default: - fprintf(stderr, "internal error: %s\n", file); - exit(1); - break; - case SJ_SETJMP: /* normal sequence */ - /* Avoid problems if early cleanup() */ - fd_map = -1; - mmap_failed = 1; - file_map = NULL; - file_ptr = NULL; - file_updated = 0; - do_file(file); - break; - case SJ_FAIL: /* error in do_file or below */ + /* Avoid problems if early cleanup() */ + fd_map = -1; + mmap_failed = 1; + file_map = NULL; + file_ptr = NULL; + file_updated = 0; + if (do_file(file)) { fprintf(stderr, "%s: failed\n", file); ++n_error; - break; - case SJ_SUCCEED: /* premature success */ - /* do nothing */ - break; - } /* end switch */ + } } return !!n_error; } diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h index c1e1b04b4871..3796eb37fb12 100644 --- a/scripts/recordmcount.h +++ b/scripts/recordmcount.h @@ -174,7 +174,7 @@ static int MIPS_is_fake_mcount(Elf_Rel const *rp) } /* Append the new shstrtab, Elf_Shdr[], __mcount_loc and its relocations. */ -static void append_func(Elf_Ehdr *const ehdr, +static int append_func(Elf_Ehdr *const ehdr, Elf_Shdr *const shstr, uint_t const *const mloc0, uint_t const *const mlocp, @@ -202,15 +202,20 @@ static void append_func(Elf_Ehdr *const ehdr, new_e_shoff = t; /* body for new shstrtab */ - ulseek(sb.st_size, SEEK_SET); - uwrite(old_shstr_sh_offset + (void *)ehdr, old_shstr_sh_size); - uwrite(mc_name, 1 + strlen(mc_name)); + if (ulseek(sb.st_size, SEEK_SET) < 0) + return -1; + if (uwrite(old_shstr_sh_offset + (void *)ehdr, old_shstr_sh_size) < 0) + return -1; + if (uwrite(mc_name, 1 + strlen(mc_name)) < 0) + return -1; /* old(modified) Elf_Shdr table, word-byte aligned */ - ulseek(t, SEEK_SET); + if (ulseek(t, SEEK_SET) < 0) + return -1; t += sizeof(Elf_Shdr) * old_shnum; - uwrite(old_shoff + (void *)ehdr, - sizeof(Elf_Shdr) * old_shnum); + if (uwrite(old_shoff + (void *)ehdr, + sizeof(Elf_Shdr) * old_shnum) < 0) + return -1; /* new sections __mcount_loc and .rel__mcount_loc */ t += 2*sizeof(mcsec); @@ -225,7 +230,8 @@ static void append_func(Elf_Ehdr *const ehdr, mcsec.sh_info = 0; mcsec.sh_addralign = _w(_size); mcsec.sh_entsize = _w(_size); - uwrite(&mcsec, sizeof(mcsec)); + if (uwrite(&mcsec, sizeof(mcsec)) < 0) + return -1; mcsec.sh_name = w(old_shstr_sh_size); mcsec.sh_type = (sizeof(Elf_Rela) == rel_entsize) @@ -239,15 +245,22 @@ static void append_func(Elf_Ehdr *const ehdr, mcsec.sh_info = w(old_shnum); mcsec.sh_addralign = _w(_size); mcsec.sh_entsize = _w(rel_entsize); - uwrite(&mcsec, sizeof(mcsec)); - uwrite(mloc0, (void *)mlocp - (void *)mloc0); - uwrite(mrel0, (void *)mrelp - (void *)mrel0); + if (uwrite(&mcsec, sizeof(mcsec)) < 0) + return -1; + + if (uwrite(mloc0, (void *)mlocp - (void *)mloc0) < 0) + return -1; + if (uwrite(mrel0, (void *)mrelp - (void *)mrel0) < 0) + return -1; ehdr->e_shoff = _w(new_e_shoff); ehdr->e_shnum = w2(2 + w2(ehdr->e_shnum)); /* {.rel,}__mcount_loc */ - ulseek(0, SEEK_SET); - uwrite(ehdr, sizeof(*ehdr)); + if (ulseek(0, SEEK_SET) < 0) + return -1; + if (uwrite(ehdr, sizeof(*ehdr)) < 0) + return -1; + return 0; } static unsigned get_mcountsym(Elf_Sym const *const sym0, @@ -351,9 +364,9 @@ static uint_t *sift_rel_mcount(uint_t *mlocp, * that are not going to be traced. The mcount calls here will be converted * into nops. */ -static void nop_mcount(Elf_Shdr const *const relhdr, - Elf_Ehdr const *const ehdr, - const char *const txtname) +static int nop_mcount(Elf_Shdr const *const relhdr, + Elf_Ehdr const *const ehdr, + const char *const txtname) { Elf_Shdr *const shdr0 = (Elf_Shdr *)(_w(ehdr->e_shoff) + (void *)ehdr); @@ -376,15 +389,18 @@ static void nop_mcount(Elf_Shdr const *const relhdr, mcountsym = get_mcountsym(sym0, relp, str0); if (mcountsym == Elf_r_sym(relp) && !is_fake_mcount(relp)) { - if (make_nop) + if (make_nop) { ret = make_nop((void *)ehdr, _w(shdr->sh_offset) + _w(relp->r_offset)); + if (ret < 0) + return -1; + } if (warn_on_notrace_sect && !once) { printf("Section %s has mcount callers being ignored\n", txtname); once = 1; /* just warn? */ if (!make_nop) - return; + return 0; } } @@ -396,14 +412,16 @@ static void nop_mcount(Elf_Shdr const *const relhdr, Elf_Rel rel; rel = *(Elf_Rel *)relp; Elf_r_info(&rel, Elf_r_sym(relp), rel_type_nop); - ulseek((void *)relp - (void *)ehdr, SEEK_SET); - uwrite(&rel, sizeof(rel)); + if (ulseek((void *)relp - (void *)ehdr, SEEK_SET) < 0) + return -1; + if (uwrite(&rel, sizeof(rel)) < 0) + return -1; } relp = (Elf_Rel const *)(rel_entsize + (void *)relp); } + return 0; } - /* * Find a symbol in the given section, to be used as the base for relocating * the table of offsets of calls to mcount. A local or global symbol suffices, @@ -414,9 +432,10 @@ static void nop_mcount(Elf_Shdr const *const relhdr, * Num: Value Size Type Bind Vis Ndx Name * 2: 00000000 0 SECTION LOCAL DEFAULT 1 */ -static unsigned find_secsym_ndx(unsigned const txtndx, +static int find_secsym_ndx(unsigned const txtndx, char const *const txtname, uint_t *const recvalp, + unsigned int *sym_index, Elf_Shdr const *const symhdr, Elf_Ehdr const *const ehdr) { @@ -438,15 +457,16 @@ static unsigned find_secsym_ndx(unsigned const txtndx, continue; *recvalp = _w(symp->st_value); - return symp - sym0; + *sym_index = symp - sym0; + return 0; } } fprintf(stderr, "Cannot find symbol for section %u: %s.\n", txtndx, txtname); - fail_file(); + cleanup(); + return -1; } - /* Evade ISO C restriction: no declaration after statement in has_rel_mcount. */ static char const * __has_rel_mcount(Elf_Shdr const *const relhdr, /* is SHT_REL or SHT_RELA */ @@ -461,7 +481,8 @@ __has_rel_mcount(Elf_Shdr const *const relhdr, /* is SHT_REL or SHT_RELA */ if (strcmp("__mcount_loc", txtname) == 0) { fprintf(stderr, "warning: __mcount_loc already exists: %s\n", fname); - succeed_file(); + cleanup(); + return already_has_rel_mcount; } if (w(txthdr->sh_type) != SHT_PROGBITS || !(_w(txthdr->sh_flags) & SHF_EXECINSTR)) @@ -491,6 +512,10 @@ static unsigned tot_relsize(Elf_Shdr const *const shdr0, for (; nhdr; --nhdr, ++shdrp) { txtname = has_rel_mcount(shdrp, shdr0, shstrtab, fname); + if (txtname == already_has_rel_mcount) { + totrelsz = 0; + break; + } if (txtname && is_mcounted_section_name(txtname)) totrelsz += _w(shdrp->sh_size); } @@ -499,7 +524,7 @@ static unsigned tot_relsize(Elf_Shdr const *const shdr0, /* Overall supervision for Elf32 ET_REL file. */ -static void +static int do_func(Elf_Ehdr *const ehdr, char const *const fname, unsigned const reltype) { Elf_Shdr *const shdr0 = (Elf_Shdr *)(_w(ehdr->e_shoff) @@ -513,26 +538,54 @@ do_func(Elf_Ehdr *const ehdr, char const *const fname, unsigned const reltype) unsigned k; /* Upper bound on space: assume all relevant relocs are for mcount. */ - unsigned const totrelsz = tot_relsize(shdr0, nhdr, shstrtab, fname); - Elf_Rel *const mrel0 = umalloc(totrelsz); - Elf_Rel * mrelp = mrel0; + unsigned totrelsz; - /* 2*sizeof(address) <= sizeof(Elf_Rel) */ - uint_t *const mloc0 = umalloc(totrelsz>>1); - uint_t * mlocp = mloc0; + Elf_Rel * mrel0; + Elf_Rel * mrelp; + + uint_t * mloc0; + uint_t * mlocp; unsigned rel_entsize = 0; unsigned symsec_sh_link = 0; + int result = 0; + + totrelsz = tot_relsize(shdr0, nhdr, shstrtab, fname); + if (totrelsz == 0) + return 0; + mrel0 = umalloc(totrelsz); + mrelp = mrel0; + if (!mrel0) + return -1; + + /* 2*sizeof(address) <= sizeof(Elf_Rel) */ + mloc0 = umalloc(totrelsz>>1); + mlocp = mloc0; + if (!mloc0) { + free(mrel0); + return -1; + } + for (relhdr = shdr0, k = nhdr; k; --k, ++relhdr) { char const *const txtname = has_rel_mcount(relhdr, shdr0, shstrtab, fname); + if (txtname == already_has_rel_mcount) { + result = 0; + file_updated = 0; + goto out; /* Nothing to be done; don't append! */ + } if (txtname && is_mcounted_section_name(txtname)) { + unsigned int recsym; uint_t recval = 0; - unsigned const recsym = find_secsym_ndx( - w(relhdr->sh_info), txtname, &recval, - &shdr0[symsec_sh_link = w(relhdr->sh_link)], - ehdr); + + symsec_sh_link = w(relhdr->sh_link); + result = find_secsym_ndx(w(relhdr->sh_info), txtname, + &recval, &recsym, + &shdr0[symsec_sh_link], + ehdr); + if (result) + goto out; rel_entsize = _w(relhdr->sh_entsize); mlocp = sift_rel_mcount(mlocp, @@ -543,13 +596,17 @@ do_func(Elf_Ehdr *const ehdr, char const *const fname, unsigned const reltype) * This section is ignored by ftrace, but still * has mcount calls. Convert them to nops now. */ - nop_mcount(relhdr, ehdr, txtname); + if (nop_mcount(relhdr, ehdr, txtname) < 0) { + result = -1; + goto out; + } } } - if (mloc0 != mlocp) { - append_func(ehdr, shstr, mloc0, mlocp, mrel0, mrelp, - rel_entsize, symsec_sh_link); - } + if (!result && mloc0 != mlocp) + result = append_func(ehdr, shstr, mloc0, mlocp, mrel0, mrelp, + rel_entsize, symsec_sh_link); +out: free(mrel0); free(mloc0); + return result; } From 3aec8638246ff284e4075aac4f72ab160ebccaf2 Mon Sep 17 00:00:00 2001 From: Matt Helsley Date: Wed, 31 Jul 2019 11:24:13 -0700 Subject: [PATCH 18/33] recordmcount: Kernel style function signature formatting The uwrite() and ulseek() functions are formatted inconsistently with the rest of the file and the kernel overall. While we're making other changes here let's fix this. Link: http://lkml.kernel.org/r/4c67698f734be9867a2aba7035fe0ce59e1e4423.1564596289.git.mhelsley@vmware.com Signed-off-by: Matt Helsley Signed-off-by: Steven Rostedt (VMware) --- scripts/recordmcount.c | 21 +++++++-------------- scripts/recordmcount.h | 13 ++++++------- 2 files changed, 13 insertions(+), 21 deletions(-) diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index c6d395b8ff29..67f9c45b824f 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -52,8 +52,7 @@ static void *file_append; /* added to the end of the file */ static size_t file_append_size; /* how much is added to end of file */ /* Per-file resource cleanup when multiple files. */ -static void -cleanup(void) +static void cleanup(void) { if (!mmap_failed) munmap(file_map, sb.st_size); @@ -68,8 +67,7 @@ cleanup(void) /* ulseek, uwrite, ...: Check return value for errors. */ -static off_t -ulseek(off_t const offset, int const whence) +static off_t ulseek(off_t const offset, int const whence) { switch (whence) { case SEEK_SET: @@ -89,8 +87,7 @@ ulseek(off_t const offset, int const whence) return file_ptr - file_map; } -static ssize_t -uwrite(void const *const buf, size_t const count) +static ssize_t uwrite(void const *const buf, size_t const count) { size_t cnt = count; off_t idx = 0; @@ -127,8 +124,7 @@ uwrite(void const *const buf, size_t const count) return count; } -static void * -umalloc(size_t size) +static void * umalloc(size_t size) { void *const addr = malloc(size); if (addr == 0) { @@ -394,8 +390,7 @@ static uint32_t (*w)(uint32_t); static uint32_t (*w2)(uint16_t); /* Names of the sections that could contain calls to mcount. */ -static int -is_mcounted_section_name(char const *const txtname) +static int is_mcounted_section_name(char const *const txtname) { return strncmp(".text", txtname, 5) == 0 || strcmp(".init.text", txtname) == 0 || @@ -448,8 +443,7 @@ static void MIPS64_r_info(Elf64_Rel *const rp, unsigned sym, unsigned type) }).r_info; } -static int -do_file(char const *const fname) +static int do_file(char const *const fname) { Elf32_Ehdr *const ehdr = mmap_file(fname); unsigned int reltype = 0; @@ -597,8 +591,7 @@ do_file(char const *const fname) return rc; } -int -main(int argc, char *argv[]) +int main(int argc, char *argv[]) { const char ftrace[] = "/ftrace.o"; int ftrace_size = sizeof(ftrace) - 1; diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h index 3796eb37fb12..ca9aaac89bfb 100644 --- a/scripts/recordmcount.h +++ b/scripts/recordmcount.h @@ -468,11 +468,10 @@ static int find_secsym_ndx(unsigned const txtndx, } /* Evade ISO C restriction: no declaration after statement in has_rel_mcount. */ -static char const * -__has_rel_mcount(Elf_Shdr const *const relhdr, /* is SHT_REL or SHT_RELA */ - Elf_Shdr const *const shdr0, - char const *const shstrtab, - char const *const fname) +static char const * __has_rel_mcount(Elf_Shdr const *const relhdr, /* reltype */ + Elf_Shdr const *const shdr0, + char const *const shstrtab, + char const *const fname) { /* .sh_info depends on .sh_type == SHT_REL[,A] */ Elf_Shdr const *const txthdr = &shdr0[w(relhdr->sh_info)]; @@ -524,8 +523,8 @@ static unsigned tot_relsize(Elf_Shdr const *const shdr0, /* Overall supervision for Elf32 ET_REL file. */ -static int -do_func(Elf_Ehdr *const ehdr, char const *const fname, unsigned const reltype) +static int do_func(Elf_Ehdr *const ehdr, char const *const fname, + unsigned const reltype) { Elf_Shdr *const shdr0 = (Elf_Shdr *)(_w(ehdr->e_shoff) + (void *)ehdr); From 2e63152bc1909e10b7bc844bbbef5a815f585b45 Mon Sep 17 00:00:00 2001 From: Matt Helsley Date: Wed, 31 Jul 2019 11:24:14 -0700 Subject: [PATCH 19/33] recordmcount: Kernel style formatting Fix up the whitespace irregularity in the ELF switch blocks. Swapping the initial value of gpfx allows us to simplify all but one of the one-line switch cases even further. Link: http://lkml.kernel.org/r/647f21f43723d3e831cedd3238c893db03eea6f0.1564596289.git.mhelsley@vmware.com Signed-off-by: Matt Helsley Signed-off-by: Steven Rostedt (VMware) --- scripts/recordmcount.c | 47 ++++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index 67f9c45b824f..273ca8b42b20 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -489,15 +489,15 @@ static int do_file(char const *const fname) push_bl_mcount_thumb = push_bl_mcount_thumb_be; break; } /* end switch */ - if (memcmp(ELFMAG, ehdr->e_ident, SELFMAG) != 0 - || w2(ehdr->e_type) != ET_REL - || ehdr->e_ident[EI_VERSION] != EV_CURRENT) { + if (memcmp(ELFMAG, ehdr->e_ident, SELFMAG) != 0 || + w2(ehdr->e_type) != ET_REL || + ehdr->e_ident[EI_VERSION] != EV_CURRENT) { fprintf(stderr, "unrecognized ET_REL file %s\n", fname); cleanup(); goto out; } - gpfx = 0; + gpfx = '_'; switch (w2(ehdr->e_machine)) { default: fprintf(stderr, "unrecognized e_machine %u %s\n", @@ -510,32 +510,35 @@ static int do_file(char const *const fname) make_nop = make_nop_x86; ideal_nop = ideal_nop5_x86_32; mcount_adjust_32 = -1; + gpfx = 0; + break; + case EM_ARM: + reltype = R_ARM_ABS32; + altmcount = "__gnu_mcount_nc"; + make_nop = make_nop_arm; + rel_type_nop = R_ARM_NONE; + gpfx = 0; break; - case EM_ARM: reltype = R_ARM_ABS32; - altmcount = "__gnu_mcount_nc"; - make_nop = make_nop_arm; - rel_type_nop = R_ARM_NONE; - break; case EM_AARCH64: - reltype = R_AARCH64_ABS64; - make_nop = make_nop_arm64; - rel_type_nop = R_AARCH64_NONE; - ideal_nop = ideal_nop4_arm64; - gpfx = '_'; - break; - case EM_IA_64: reltype = R_IA64_IMM64; gpfx = '_'; break; - case EM_MIPS: /* reltype: e_class */ gpfx = '_'; break; - case EM_PPC: reltype = R_PPC_ADDR32; gpfx = '_'; break; - case EM_PPC64: reltype = R_PPC64_ADDR64; gpfx = '_'; break; - case EM_S390: /* reltype: e_class */ gpfx = '_'; break; - case EM_SH: reltype = R_SH_DIR32; break; - case EM_SPARCV9: reltype = R_SPARC_64; gpfx = '_'; break; + reltype = R_AARCH64_ABS64; + make_nop = make_nop_arm64; + rel_type_nop = R_AARCH64_NONE; + ideal_nop = ideal_nop4_arm64; + break; + case EM_IA_64: reltype = R_IA64_IMM64; break; + case EM_MIPS: /* reltype: e_class */ break; + case EM_PPC: reltype = R_PPC_ADDR32; break; + case EM_PPC64: reltype = R_PPC64_ADDR64; break; + case EM_S390: /* reltype: e_class */ break; + case EM_SH: reltype = R_SH_DIR32; gpfx = 0; break; + case EM_SPARCV9: reltype = R_SPARC_64; break; case EM_X86_64: make_nop = make_nop_x86; ideal_nop = ideal_nop5_x86_64; reltype = R_X86_64_64; rel_type_nop = R_X86_64_NONE; mcount_adjust_64 = -1; + gpfx = 0; break; } /* end switch */ From c97fea26254b284c93a67b788968622f9cc03b30 Mon Sep 17 00:00:00 2001 From: Matt Helsley Date: Wed, 31 Jul 2019 11:24:15 -0700 Subject: [PATCH 20/33] recordmcount: Remove redundant cleanup() calls Redundant cleanup calls were introduced when transitioning from the old error/success handling via setjmp/longjmp -- the longjmp ensured the cleanup() call only happened once but replacing the success_file()/fail_file() calls with cleanup() meant that multiple cleanup() calls can happen as we return from function calls. In do_file(), looking just before and after the "goto out" jumps we can see that multiple cleanups() are being performed. We remove cleanup() calls from the nested functions because it makes the code easier to review -- the resources being cleaned up are generally allocated and initialized in the callers so freeing them there makes more sense. Other redundant cleanup() calls: mmap_file() is only called from do_file() and, if mmap_file() fails, then we goto out and do cleanup() there too. write_file() is only called from do_file() and do_file() calls cleanup() unconditionally after returning from write_file() therefore the cleanup() calls in write_file() are not necessary. find_secsym_ndx(), called from do_func()'s for-loop, when we are cleaning up here it's obvious that we break out of the loop and do another cleanup(). __has_rel_mcount() is called from two parts of do_func() and calls cleanup(). In theory we move them into do_func(), however these in turn prove redundant so another simplification step removes them as well. Link: http://lkml.kernel.org/r/de197e17fc5426623a847ea7cf3a1560a7402a4b.1564596289.git.mhelsley@vmware.com Signed-off-by: Matt Helsley Signed-off-by: Steven Rostedt (VMware) --- scripts/recordmcount.c | 13 ------------- scripts/recordmcount.h | 2 -- 2 files changed, 15 deletions(-) diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index 273ca8b42b20..5677fcc88a72 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -258,17 +258,14 @@ static void *mmap_file(char const *fname) fd_map = open(fname, O_RDONLY); if (fd_map < 0) { perror(fname); - cleanup(); return NULL; } if (fstat(fd_map, &sb) < 0) { perror(fname); - cleanup(); goto out; } if (!S_ISREG(sb.st_mode)) { fprintf(stderr, "not a regular file: %s\n", fname); - cleanup(); goto out; } file_map = mmap(0, sb.st_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, @@ -314,13 +311,11 @@ static int write_file(const char *fname) fd_map = open(tmp_file, O_WRONLY | O_TRUNC | O_CREAT, sb.st_mode); if (fd_map < 0) { perror(fname); - cleanup(); return -1; } n = write(fd_map, file_map, sb.st_size); if (n != sb.st_size) { perror("write"); - cleanup(); close(fd_map); return -1; } @@ -328,7 +323,6 @@ static int write_file(const char *fname) n = write(fd_map, file_append, file_append_size); if (n != file_append_size) { perror("write"); - cleanup(); close(fd_map); return -1; } @@ -336,7 +330,6 @@ static int write_file(const char *fname) close(fd_map); if (rename(tmp_file, fname) < 0) { perror(fname); - cleanup(); return -1; } return 0; @@ -460,7 +453,6 @@ static int do_file(char const *const fname) default: fprintf(stderr, "unrecognized ELF data encoding %d: %s\n", ehdr->e_ident[EI_DATA], fname); - cleanup(); goto out; case ELFDATA2LSB: if (*(unsigned char const *)&endian != 1) { @@ -493,7 +485,6 @@ static int do_file(char const *const fname) w2(ehdr->e_type) != ET_REL || ehdr->e_ident[EI_VERSION] != EV_CURRENT) { fprintf(stderr, "unrecognized ET_REL file %s\n", fname); - cleanup(); goto out; } @@ -502,7 +493,6 @@ static int do_file(char const *const fname) default: fprintf(stderr, "unrecognized e_machine %u %s\n", w2(ehdr->e_machine), fname); - cleanup(); goto out; case EM_386: reltype = R_386_32; @@ -546,14 +536,12 @@ static int do_file(char const *const fname) default: fprintf(stderr, "unrecognized ELF class %d %s\n", ehdr->e_ident[EI_CLASS], fname); - cleanup(); goto out; case ELFCLASS32: if (w2(ehdr->e_ehsize) != sizeof(Elf32_Ehdr) || w2(ehdr->e_shentsize) != sizeof(Elf32_Shdr)) { fprintf(stderr, "unrecognized ET_REL file: %s\n", fname); - cleanup(); goto out; } if (w2(ehdr->e_machine) == EM_MIPS) { @@ -569,7 +557,6 @@ static int do_file(char const *const fname) || w2(ghdr->e_shentsize) != sizeof(Elf64_Shdr)) { fprintf(stderr, "unrecognized ET_REL file: %s\n", fname); - cleanup(); goto out; } if (w2(ghdr->e_machine) == EM_S390) { diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h index ca9aaac89bfb..8f0a278ce0af 100644 --- a/scripts/recordmcount.h +++ b/scripts/recordmcount.h @@ -463,7 +463,6 @@ static int find_secsym_ndx(unsigned const txtndx, } fprintf(stderr, "Cannot find symbol for section %u: %s.\n", txtndx, txtname); - cleanup(); return -1; } @@ -480,7 +479,6 @@ static char const * __has_rel_mcount(Elf_Shdr const *const relhdr, /* reltype */ if (strcmp("__mcount_loc", txtname) == 0) { fprintf(stderr, "warning: __mcount_loc already exists: %s\n", fname); - cleanup(); return already_has_rel_mcount; } if (w(txthdr->sh_type) != SHT_PROGBITS || From 4fbcf0741697a75eb766835cb34836cf4f3803da Mon Sep 17 00:00:00 2001 From: Matt Helsley Date: Wed, 31 Jul 2019 11:24:16 -0700 Subject: [PATCH 21/33] recordmcount: Clarify what cleanup() does cleanup() mostly frees/unmaps the malloc'd/privately-mapped copy of the ELF file recordmcount is working on, which is set up in mmap_file(). It also deals with positioning within the pseduo prive-mapping of the file and appending to the ELF file. Split into two steps: mmap_cleanup() for the mapping itself file_append_cleanup() for allocations storing the appended ELF data. Also, move the global variable initializations out of the main, per-object-file loop and nearer to the alloc/init (mmap_file()) and two cleanup functions so we can more clearly see how they're related. Link: http://lkml.kernel.org/r/2a387ac86d133d22c68f57b9933c32bab1d09a2d.1564596289.git.mhelsley@vmware.com Signed-off-by: Matt Helsley Signed-off-by: Steven Rostedt (VMware) --- scripts/recordmcount.c | 151 ++++++++++++++++++++++------------------- 1 file changed, 81 insertions(+), 70 deletions(-) diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index 5677fcc88a72..612268eabef4 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -48,21 +48,26 @@ static void *file_map; /* pointer of the mapped file */ static void *file_end; /* pointer to the end of the mapped file */ static int file_updated; /* flag to state file was changed */ static void *file_ptr; /* current file pointer location */ + static void *file_append; /* added to the end of the file */ static size_t file_append_size; /* how much is added to end of file */ /* Per-file resource cleanup when multiple files. */ -static void cleanup(void) +static void file_append_cleanup(void) +{ + free(file_append); + file_append = NULL; + file_append_size = 0; + file_updated = 0; +} + +static void mmap_cleanup(void) { if (!mmap_failed) munmap(file_map, sb.st_size); else free(file_map); file_map = NULL; - free(file_append); - file_append = NULL; - file_append_size = 0; - file_updated = 0; } /* ulseek, uwrite, ...: Check return value for errors. */ @@ -103,7 +108,8 @@ static ssize_t uwrite(void const *const buf, size_t const count) } if (!file_append) { perror("write"); - cleanup(); + file_append_cleanup(); + mmap_cleanup(); return -1; } if (file_ptr < file_end) { @@ -129,12 +135,76 @@ static void * umalloc(size_t size) void *const addr = malloc(size); if (addr == 0) { fprintf(stderr, "malloc failed: %zu bytes\n", size); - cleanup(); + file_append_cleanup(); + mmap_cleanup(); return NULL; } return addr; } +/* + * Get the whole file as a programming convenience in order to avoid + * malloc+lseek+read+free of many pieces. If successful, then mmap + * avoids copying unused pieces; else just read the whole file. + * Open for both read and write; new info will be appended to the file. + * Use MAP_PRIVATE so that a few changes to the in-memory ElfXX_Ehdr + * do not propagate to the file until an explicit overwrite at the last. + * This preserves most aspects of consistency (all except .st_size) + * for simultaneous readers of the file while we are appending to it. + * However, multiple writers still are bad. We choose not to use + * locking because it is expensive and the use case of kernel build + * makes multiple writers unlikely. + */ +static void *mmap_file(char const *fname) +{ + /* Avoid problems if early cleanup() */ + fd_map = -1; + mmap_failed = 1; + file_map = NULL; + file_ptr = NULL; + file_updated = 0; + sb.st_size = 0; + + fd_map = open(fname, O_RDONLY); + if (fd_map < 0) { + perror(fname); + return NULL; + } + if (fstat(fd_map, &sb) < 0) { + perror(fname); + goto out; + } + if (!S_ISREG(sb.st_mode)) { + fprintf(stderr, "not a regular file: %s\n", fname); + goto out; + } + file_map = mmap(0, sb.st_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, + fd_map, 0); + if (file_map == MAP_FAILED) { + mmap_failed = 1; + file_map = umalloc(sb.st_size); + if (!file_map) { + perror(fname); + goto out; + } + if (read(fd_map, file_map, sb.st_size) != sb.st_size) { + perror(fname); + free(file_map); + file_map = NULL; + goto out; + } + } else + mmap_failed = 0; +out: + close(fd_map); + fd_map = -1; + + file_end = file_map + sb.st_size; + + return file_map; +} + + static unsigned char ideal_nop5_x86_64[5] = { 0x0f, 0x1f, 0x44, 0x00, 0x00 }; static unsigned char ideal_nop5_x86_32[5] = { 0x3e, 0x8d, 0x74, 0x26, 0x00 }; static unsigned char *ideal_nop; @@ -238,61 +308,6 @@ static int make_nop_arm64(void *map, size_t const offset) return 0; } -/* - * Get the whole file as a programming convenience in order to avoid - * malloc+lseek+read+free of many pieces. If successful, then mmap - * avoids copying unused pieces; else just read the whole file. - * Open for both read and write; new info will be appended to the file. - * Use MAP_PRIVATE so that a few changes to the in-memory ElfXX_Ehdr - * do not propagate to the file until an explicit overwrite at the last. - * This preserves most aspects of consistency (all except .st_size) - * for simultaneous readers of the file while we are appending to it. - * However, multiple writers still are bad. We choose not to use - * locking because it is expensive and the use case of kernel build - * makes multiple writers unlikely. - */ -static void *mmap_file(char const *fname) -{ - file_map = NULL; - sb.st_size = 0; - fd_map = open(fname, O_RDONLY); - if (fd_map < 0) { - perror(fname); - return NULL; - } - if (fstat(fd_map, &sb) < 0) { - perror(fname); - goto out; - } - if (!S_ISREG(sb.st_mode)) { - fprintf(stderr, "not a regular file: %s\n", fname); - goto out; - } - file_map = mmap(0, sb.st_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, - fd_map, 0); - mmap_failed = 0; - if (file_map == MAP_FAILED) { - mmap_failed = 1; - file_map = umalloc(sb.st_size); - if (!file_map) { - perror(fname); - goto out; - } - if (read(fd_map, file_map, sb.st_size) != sb.st_size) { - perror(fname); - free(file_map); - file_map = NULL; - goto out; - } - } -out: - close(fd_map); - - file_end = file_map + sb.st_size; - - return file_map; -} - static int write_file(const char *fname) { char tmp_file[strlen(fname) + 4]; @@ -438,10 +453,11 @@ static void MIPS64_r_info(Elf64_Rel *const rp, unsigned sym, unsigned type) static int do_file(char const *const fname) { - Elf32_Ehdr *const ehdr = mmap_file(fname); unsigned int reltype = 0; + Elf32_Ehdr *ehdr; int rc = -1; + ehdr = mmap_file(fname); if (!ehdr) goto out; @@ -577,7 +593,8 @@ static int do_file(char const *const fname) rc = write_file(fname); out: - cleanup(); + file_append_cleanup(); + mmap_cleanup(); return rc; } @@ -620,12 +637,6 @@ int main(int argc, char *argv[]) strcmp(file + (len - ftrace_size), ftrace) == 0) continue; - /* Avoid problems if early cleanup() */ - fd_map = -1; - mmap_failed = 1; - file_map = NULL; - file_ptr = NULL; - file_updated = 0; if (do_file(file)) { fprintf(stderr, "%s: failed\n", file); ++n_error; From f7edb451fa51e44e62177347ea7850aa0e901ea5 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 7 Aug 2019 11:28:59 -0400 Subject: [PATCH 22/33] tracing/arm64: Have max stack tracer handle the case of return address after data Most archs (well at least x86) store the function call return address on the stack before storing the local variables for the function. The max stack tracer depends on this in its algorithm to display the stack size of each function it finds in the back trace. Some archs (arm64), may store the return address (from its link register) just before calling a nested function. There's no reason to save the link register on leaf functions, as it wont be updated. This breaks the algorithm of the max stack tracer. Add a new define ARCH_FTRACE_SHIFT_STACK_TRACER that an architecture may set if it stores the return address (link register) after it stores the function's local variables, and have the stack trace shift the values of the mapped stack size to the appropriate functions. Link: 20190802094103.163576-1-jiping.ma2@windriver.com Reported-by: Jiping Ma Acked-by: Will Deacon Signed-off-by: Steven Rostedt (VMware) --- arch/arm64/include/asm/ftrace.h | 13 +++++++++++++ kernel/trace/trace_stack.c | 14 ++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index 5ab5200b2bdc..d48667b04c41 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -14,6 +14,19 @@ #define MCOUNT_ADDR ((unsigned long)_mcount) #define MCOUNT_INSN_SIZE AARCH64_INSN_SIZE +/* + * Currently, gcc tends to save the link register after the local variables + * on the stack. This causes the max stack tracer to report the function + * frame sizes for the wrong functions. By defining + * ARCH_FTRACE_SHIFT_STACK_TRACER, it will tell the stack tracer to expect + * to find the return address on the stack after the local variables have + * been set up. + * + * Note, this may change in the future, and we will need to deal with that + * if it were to happen. + */ +#define ARCH_FTRACE_SHIFT_STACK_TRACER 1 + #ifndef __ASSEMBLY__ #include diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 5d16f73898db..642a850af81a 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -158,6 +158,20 @@ static void check_stack(unsigned long ip, unsigned long *stack) i++; } +#ifdef ARCH_FTRACE_SHIFT_STACK_TRACER + /* + * Some archs will store the link register before calling + * nested functions. This means the saved return address + * comes after the local storage, and we need to shift + * for that. + */ + if (x > 1) { + memmove(&stack_trace_index[0], &stack_trace_index[1], + sizeof(stack_trace_index[0]) * (x - 1)); + x--; + } +#endif + stack_trace_nr_entries = x; if (task_stack_end_corrupted(current)) { From 58fe7a87db51ea00596187765dabfc2c4ea2b436 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 7 Aug 2019 12:27:30 -0400 Subject: [PATCH 23/33] tracing: Document the stack trace algorithm in the comments As the max stack tracer algorithm is not that easy to understand from the code, add comments that explain the algorithm and mentions how ARCH_FTRACE_SHIFT_STACK_TRACER affects it. Link: http://lkml.kernel.org/r/20190806123455.487ac02b@gandalf.local.home Suggested-by: Joel Fernandes Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_stack.c | 98 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 642a850af81a..ec9a34a97129 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -53,6 +53,104 @@ static void print_max_stack(void) } } +/* + * The stack tracer looks for a maximum stack at each call from a function. It + * registers a callback from ftrace, and in that callback it examines the stack + * size. It determines the stack size from the variable passed in, which is the + * address of a local variable in the stack_trace_call() callback function. + * The stack size is calculated by the address of the local variable to the top + * of the current stack. If that size is smaller than the currently saved max + * stack size, nothing more is done. + * + * If the size of the stack is greater than the maximum recorded size, then the + * following algorithm takes place. + * + * For architectures (like x86) that store the function's return address before + * saving the function's local variables, the stack will look something like + * this: + * + * [ top of stack ] + * 0: sys call entry frame + * 10: return addr to entry code + * 11: start of sys_foo frame + * 20: return addr to sys_foo + * 21: start of kernel_func_bar frame + * 30: return addr to kernel_func_bar + * 31: [ do trace stack here ] + * + * The save_stack_trace() is called returning all the functions it finds in the + * current stack. Which would be (from the bottom of the stack to the top): + * + * return addr to kernel_func_bar + * return addr to sys_foo + * return addr to entry code + * + * Now to figure out how much each of these functions' local variable size is, + * a search of the stack is made to find these values. When a match is made, it + * is added to the stack_dump_trace[] array. The offset into the stack is saved + * in the stack_trace_index[] array. The above example would show: + * + * stack_dump_trace[] | stack_trace_index[] + * ------------------ + ------------------- + * return addr to kernel_func_bar | 30 + * return addr to sys_foo | 20 + * return addr to entry | 10 + * + * The print_max_stack() function above, uses these values to print the size of + * each function's portion of the stack. + * + * for (i = 0; i < nr_entries; i++) { + * size = i == nr_entries - 1 ? stack_trace_index[i] : + * stack_trace_index[i] - stack_trace_index[i+1] + * print "%d %d %d %s\n", i, stack_trace_index[i], size, stack_dump_trace[i]); + * } + * + * The above shows + * + * depth size location + * ----- ---- -------- + * 0 30 10 kernel_func_bar + * 1 20 10 sys_foo + * 2 10 10 entry code + * + * Now for architectures that might save the return address after the functions + * local variables (saving the link register before calling nested functions), + * this will cause the stack to look a little different: + * + * [ top of stack ] + * 0: sys call entry frame + * 10: start of sys_foo_frame + * 19: return addr to entry code << lr saved before calling kernel_func_bar + * 20: start of kernel_func_bar frame + * 29: return addr to sys_foo_frame << lr saved before calling next function + * 30: [ do trace stack here ] + * + * Although the functions returned by save_stack_trace() may be the same, the + * placement in the stack will be different. Using the same algorithm as above + * would yield: + * + * stack_dump_trace[] | stack_trace_index[] + * ------------------ + ------------------- + * return addr to kernel_func_bar | 30 + * return addr to sys_foo | 29 + * return addr to entry | 19 + * + * Where the mapping is off by one: + * + * kernel_func_bar stack frame size is 29 - 19 not 30 - 29! + * + * To fix this, if the architecture sets ARCH_RET_ADDR_AFTER_LOCAL_VARS the + * values in stack_trace_index[] are shifted by one to and the number of + * stack trace entries is decremented by one. + * + * stack_dump_trace[] | stack_trace_index[] + * ------------------ + ------------------- + * return addr to kernel_func_bar | 29 + * return addr to sys_foo | 19 + * + * Although the entry function is not displayed, the first function (sys_foo) + * will still include the stack size of it. + */ static void check_stack(unsigned long ip, unsigned long *stack) { unsigned long this_size, flags; unsigned long *p, *top, *start; From a47b53e95accfd2814efe39dfca06dbd45cd857a Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 13 Aug 2019 12:14:35 -0400 Subject: [PATCH 24/33] tracing: Rename tracing_reset() to tracing_reset_cpu() The name tracing_reset() was a misnomer, as it really only reset a single CPU buffer. Rename it to tracing_reset_cpu() and also make it static and remove the prototype from trace.h, as it is only used in a single function. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 6 +++--- kernel/trace/trace.h | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3916b72de715..e917aa783675 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1854,7 +1854,7 @@ int __init register_tracer(struct tracer *type) return ret; } -void tracing_reset(struct trace_buffer *buf, int cpu) +static void tracing_reset_cpu(struct trace_buffer *buf, int cpu) { struct ring_buffer *buffer = buf->buffer; @@ -4251,7 +4251,7 @@ static int tracing_open(struct inode *inode, struct file *file) if (cpu == RING_BUFFER_ALL_CPUS) tracing_reset_online_cpus(trace_buf); else - tracing_reset(trace_buf, cpu); + tracing_reset_cpu(trace_buf, cpu); } if (file->f_mode & FMODE_READ) { @@ -6742,7 +6742,7 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, if (iter->cpu_file == RING_BUFFER_ALL_CPUS) tracing_reset_online_cpus(&tr->max_buffer); else - tracing_reset(&tr->max_buffer, iter->cpu_file); + tracing_reset_cpu(&tr->max_buffer, iter->cpu_file); } break; } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 005f08629b8b..26b0a08f3c7d 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -677,7 +677,6 @@ trace_buffer_iter(struct trace_iterator *iter, int cpu) int tracer_init(struct tracer *t, struct trace_array *tr); int tracing_is_enabled(void); -void tracing_reset(struct trace_buffer *buf, int cpu); void tracing_reset_online_cpus(struct trace_buffer *buf); void tracing_reset_current(int cpu); void tracing_reset_all_online_cpus(void); From ac68154626ab7fe4ce5f424937c34f42a3e20c5b Mon Sep 17 00:00:00 2001 From: Zhengjun Xing Date: Fri, 12 Jul 2019 09:53:08 +0800 Subject: [PATCH 25/33] tracing: Add "gfp_t" support in synthetic_events Add "gfp_t" support in synthetic_events, then the "gfp_t" type parameter in some functions can be traced. Prints the gfp flags as hex in addition to the human-readable flag string. Example output: whoopsie-630 [000] ...1 78.969452: testevent: bar=b20 (GFP_ATOMIC|__GFP_ZERO) rcuc/0-11 [000] ...1 81.097555: testevent: bar=a20 (GFP_ATOMIC) rcuc/0-11 [000] ...1 81.583123: testevent: bar=a20 (GFP_ATOMIC) Link: http://lkml.kernel.org/r/20190712015308.9908-1-zhengjun.xing@linux.intel.com Signed-off-by: Zhengjun Xing [ Added printing of flag names ] Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 65e7d071ed28..3a6e42aa08e6 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -13,6 +13,10 @@ #include #include +/* for gfp flag names */ +#include +#include + #include "tracing_map.h" #include "trace.h" #include "trace_dynevent.h" @@ -752,6 +756,8 @@ static int synth_field_size(char *type) size = sizeof(unsigned long); else if (strcmp(type, "pid_t") == 0) size = sizeof(pid_t); + else if (strcmp(type, "gfp_t") == 0) + size = sizeof(gfp_t); else if (synth_field_is_string(type)) size = synth_field_string_size(type); @@ -792,6 +798,8 @@ static const char *synth_field_fmt(char *type) fmt = "%lu"; else if (strcmp(type, "pid_t") == 0) fmt = "%d"; + else if (strcmp(type, "gfp_t") == 0) + fmt = "%x"; else if (synth_field_is_string(type)) fmt = "%s"; @@ -834,9 +842,20 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter, i == se->n_fields - 1 ? "" : " "); n_u64 += STR_VAR_LEN_MAX / sizeof(u64); } else { + struct trace_print_flags __flags[] = { + __def_gfpflag_names, {-1, NULL} }; + trace_seq_printf(s, print_fmt, se->fields[i]->name, entry->fields[n_u64], i == se->n_fields - 1 ? "" : " "); + + if (strcmp(se->fields[i]->type, "gfp_t") == 0) { + trace_seq_puts(s, " ("); + trace_print_flags_seq(s, "|", + entry->fields[n_u64], + __flags); + trace_seq_putc(s, ')'); + } n_u64++; } } From 08468754c16e731d31538a8b1b0b433be2410a89 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 10 Sep 2019 22:33:36 +0800 Subject: [PATCH 26/33] ftrace: Simplify ftrace hash lookup code in clear_func_from_hash() Function ftrace_lookup_ip() will check empty hash table. So we don't need extra check outside. Link: http://lkml.kernel.org/r/20190910143336.13472-1-changbin.du@gmail.com Signed-off-by: Changbin Du Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index f9821a3374e9..c4cc048eb594 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -6036,11 +6036,7 @@ clear_func_from_hash(struct ftrace_init_func *func, struct ftrace_hash *hash) { struct ftrace_func_entry *entry; - if (ftrace_hash_empty(hash)) - return; - - entry = __ftrace_lookup_ip(hash, func->ip); - + entry = ftrace_lookup_ip(hash, func->ip); /* * Do not allow this rec to match again. * Yeah, it may waste some memory, but will be removed From 119cdbdb95a66203c0bca09474427c297186f7a3 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 6 Aug 2019 18:15:43 +0300 Subject: [PATCH 27/33] tracing: Be more clever when dumping hex in __print_hex() Hex dump as many as 16 bytes at once in trace_print_hex_seq() instead of byte-by-byte approach. Link: http://lkml.kernel.org/r/20190806151543.86061-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_output.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index cab4a5398f1d..d54ce252b05a 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -219,10 +219,10 @@ trace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len, { int i; const char *ret = trace_seq_buffer_ptr(p); + const char *fmt = concatenate ? "%*phN" : "%*ph"; - for (i = 0; i < buf_len; i++) - trace_seq_printf(p, "%s%2.2x", concatenate || i == 0 ? "" : " ", - buf[i]); + for (i = 0; i < buf_len; i += 16) + trace_seq_printf(p, fmt, min(buf_len - i, 16), &buf[i]); trace_seq_putc(p, 0); return ret; From 17f8607a1658a8e70415eef67909f990d13017b5 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 1 Sep 2019 17:02:01 -0500 Subject: [PATCH 28/33] tracing: Make sure variable reference alias has correct var_ref_idx Original changelog from Steve Rostedt (except last sentence which explains the problem, and the Fixes: tag): I performed a three way histogram with the following commands: echo 'irq_lat u64 lat pid_t pid' > synthetic_events echo 'wake_lat u64 lat u64 irqlat pid_t pid' >> synthetic_events echo 'hist:keys=common_pid:irqts=common_timestamp.usecs if function == 0xffffffff81200580' > events/timer/hrtimer_start/trigger echo 'hist:keys=common_pid:lat=common_timestamp.usecs-$irqts:onmatch(timer.hrtimer_start).irq_lat($lat,pid) if common_flags & 1' > events/sched/sched_waking/trigger echo 'hist:keys=pid:wakets=common_timestamp.usecs,irqlat=lat' > events/synthetic/irq_lat/trigger echo 'hist:keys=next_pid:lat=common_timestamp.usecs-$wakets,irqlat=$irqlat:onmatch(synthetic.irq_lat).wake_lat($lat,$irqlat,next_pid)' > events/sched/sched_switch/trigger echo 1 > events/synthetic/wake_lat/enable Basically I wanted to see: hrtimer_start (calling function tick_sched_timer) Note: # grep tick_sched_timer /proc/kallsyms ffffffff81200580 t tick_sched_timer And save the time of that, and then record sched_waking if it is called in interrupt context and with the same pid as the hrtimer_start, it will record the latency between that and the waking event. I then look at when the task that is woken is scheduled in, and record the latency between the wakeup and the task running. At the end, the wake_lat synthetic event will show the wakeup to scheduled latency, as well as the irq latency in from hritmer_start to the wakeup. The problem is that I found this: -0 [007] d... 190.485261: wake_lat: lat=27 irqlat=190485230 pid=698 -0 [005] d... 190.485283: wake_lat: lat=40 irqlat=190485239 pid=10 -0 [002] d... 190.488327: wake_lat: lat=56 irqlat=190488266 pid=335 -0 [005] d... 190.489330: wake_lat: lat=64 irqlat=190489262 pid=10 -0 [003] d... 190.490312: wake_lat: lat=43 irqlat=190490265 pid=77 -0 [005] d... 190.493322: wake_lat: lat=54 irqlat=190493262 pid=10 -0 [005] d... 190.497305: wake_lat: lat=35 irqlat=190497267 pid=10 -0 [005] d... 190.501319: wake_lat: lat=50 irqlat=190501264 pid=10 The irqlat seemed quite large! Investigating this further, if I had enabled the irq_lat synthetic event, I noticed this: -0 [002] d.s. 249.429308: irq_lat: lat=164968 pid=335 -0 [002] d... 249.429369: wake_lat: lat=55 irqlat=249429308 pid=335 Notice that the timestamp of the irq_lat "249.429308" is awfully similar to the reported irqlat variable. In fact, all instances were like this. It appeared that: irqlat=$irqlat Wasn't assigning the old $irqlat to the new irqlat variable, but instead was assigning the $irqts to it. The issue is that assigning the old $irqlat to the new irqlat variable creates a variable reference alias, but the alias creation code forgets to make sure the alias uses the same var_ref_idx to access the reference. Link: http://lkml.kernel.org/r/1567375321.5282.12.camel@kernel.org Cc: Linux Trace Devel Cc: linux-rt-users Cc: stable@vger.kernel.org Fixes: 7e8b88a30b085 ("tracing: Add hist trigger support for variable reference aliases") Reported-by: Steven Rostedt (VMware) Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 3a6e42aa08e6..9468bd8d44a2 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -2804,6 +2804,8 @@ static struct hist_field *create_alias(struct hist_trigger_data *hist_data, return NULL; } + alias->var_ref_idx = var_ref->var_ref_idx; + return alias; } From d59fae6fea39efe65bb3d3310aaa2a54b5f55c0d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 17 Sep 2019 14:11:37 +0900 Subject: [PATCH 29/33] tracing/kprobe: Fix NULL pointer access in trace_porbe_unlink() Fix NULL pointer access in trace_probe_unlink() by initializing trace_probe.list correctly in trace_probe_init(). In the error case of trace_probe_init(), it can call trace_probe_unlink() before initializing trace_probe.list member. This causes NULL pointer dereference at list_del_init() in trace_probe_unlink(). Syzbot reported : kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 8633 Comm: syz-executor797 Not tainted 5.3.0-rc8-next-20190915 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__list_del_entry_valid+0x85/0xf5 lib/list_debug.c:51 Code: 0f 84 e1 00 00 00 48 b8 22 01 00 00 00 00 ad de 49 39 c4 0f 84 e2 00 00 00 48 b8 00 00 00 00 00 fc ff df 4c 89 e2 48 c1 ea 03 <80> 3c 02 00 75 53 49 8b 14 24 4c 39 f2 0f 85 99 00 00 00 49 8d 7d RSP: 0018:ffff888090a7f9d8 EFLAGS: 00010246 RAX: dffffc0000000000 RBX: ffff88809b6f90c0 RCX: ffffffff817c0ca9 RDX: 0000000000000000 RSI: ffffffff817c0a73 RDI: ffff88809b6f90c8 RBP: ffff888090a7f9f0 R08: ffff88809a04e600 R09: ffffed1015d26aed R10: ffffed1015d26aec R11: ffff8880ae935763 R12: 0000000000000000 R13: 0000000000000000 R14: ffff88809b6f90c0 R15: ffff88809b6f90d0 FS: 0000555556f99880(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000006cc090 CR3: 00000000962b2000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __list_del_entry include/linux/list.h:131 [inline] list_del_init include/linux/list.h:190 [inline] trace_probe_unlink+0x1f/0x200 kernel/trace/trace_probe.c:959 trace_probe_cleanup+0xd3/0x110 kernel/trace/trace_probe.c:973 trace_probe_init+0x3f2/0x510 kernel/trace/trace_probe.c:1011 alloc_trace_uprobe+0x5e/0x250 kernel/trace/trace_uprobe.c:353 create_local_trace_uprobe+0x109/0x4a0 kernel/trace/trace_uprobe.c:1508 perf_uprobe_init+0x131/0x210 kernel/trace/trace_event_perf.c:314 perf_uprobe_event_init+0x106/0x1a0 kernel/events/core.c:8898 perf_try_init_event+0x135/0x590 kernel/events/core.c:10184 perf_init_event kernel/events/core.c:10228 [inline] perf_event_alloc.part.0+0x1b89/0x33d0 kernel/events/core.c:10505 perf_event_alloc kernel/events/core.c:10887 [inline] __do_sys_perf_event_open+0xa2d/0x2d00 kernel/events/core.c:10989 __se_sys_perf_event_open kernel/events/core.c:10871 [inline] __x64_sys_perf_event_open+0xbe/0x150 kernel/events/core.c:10871 do_syscall_64+0xfa/0x760 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Link: http://lkml.kernel.org/r/156869709721.22406.5153754822203046939.stgit@devnote2 Reported-by: syzbot+2f807f4d3a2a4e87f18f@syzkaller.appspotmail.com Fixes: ca89bc071d5e ("tracing/kprobe: Add multi-probe per event support") Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_probe.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 1e67fef06e53..baf58a3612c0 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -986,6 +986,12 @@ int trace_probe_init(struct trace_probe *tp, const char *event, if (!tp->event) return -ENOMEM; + INIT_LIST_HEAD(&tp->event->files); + INIT_LIST_HEAD(&tp->event->class.fields); + INIT_LIST_HEAD(&tp->event->probes); + INIT_LIST_HEAD(&tp->list); + list_add(&tp->event->probes, &tp->list); + call = trace_probe_event_call(tp); call->class = &tp->event->class; call->name = kstrdup(event, GFP_KERNEL); @@ -999,11 +1005,6 @@ int trace_probe_init(struct trace_probe *tp, const char *event, ret = -ENOMEM; goto error; } - INIT_LIST_HEAD(&tp->event->files); - INIT_LIST_HEAD(&tp->event->class.fields); - INIT_LIST_HEAD(&tp->event->probes); - INIT_LIST_HEAD(&tp->list); - list_add(&tp->event->probes, &tp->list); return 0; From 604e3548236de26ed6659b85bda8f27b28a7601b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 22 Mar 2019 15:09:23 -0400 Subject: [PATCH 30/33] selftests/ftrace: Select an existing function in kprobe_eventname test Running the ftrace selftests on the latest kernel caused the kprobe_eventname test to fail. It was due to the test that searches for a function with at "dot" in the name and adding a probe to that. Unfortunately, for this test, it picked: optimize_nops.isra.2.cold.4 Which happens to be marked as "__init", which means it no longer exists in the kernel! (kallsyms keeps those function names around for tracing purposes) As only functions that still exist are in the available_filter_functions file, as they are removed when the functions are freed at boot or module exit, have the test search for a function with ".isra." in the name as well as being in the available_filter_functions (if the file exists). Link: http://lkml.kernel.org/r/20190322150923.1b58eca5@gandalf.local.home Acked-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- .../ftrace/test.d/kprobe/kprobe_eventname.tc | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc index 3fb70e01b1fe..3ff236719b6e 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc @@ -24,7 +24,21 @@ test -d events/kprobes2/event2 || exit_failure :;: "Add an event on dot function without name" ;: -FUNC=`grep -m 10 " [tT] .*\.isra\..*$" /proc/kallsyms | tail -n 1 | cut -f 3 -d " "` +find_dot_func() { + if [ ! -f available_filter_functions ]; then + grep -m 10 " [tT] .*\.isra\..*$" /proc/kallsyms | tail -n 1 | cut -f 3 -d " " + return; + fi + + grep " [tT] .*\.isra\..*" /proc/kallsyms | cut -f 3 -d " " | while read f; do + if grep -s $f available_filter_functions; then + echo $f + break + fi + done +} + +FUNC=`find_dot_func | tail -n 1` [ "x" != "x$FUNC" ] || exit_unresolved echo "p $FUNC" > kprobe_events EVENT=`grep $FUNC kprobe_events | cut -f 1 -d " " | cut -f 2 -d:` From 44d00dc7ceab1732ebd5f5aae601b24dacdf10c5 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 18 Sep 2019 17:55:37 +0900 Subject: [PATCH 31/33] tracing/probe: Fix to allow user to enable events on unloaded modules Fix to allow user to enable probe events on unloaded modules. This operations was allowed before commit 60d53e2c3b75 ("tracing/probe: Split trace_event related data from trace_probe"), because if users need to probe module init functions, they have to enable those probe events before loading module. Link: http://lkml.kernel.org/r/156879693733.31056.9331322616994665167.stgit@devnote2 Cc: stable@vger.kernel.org Fixes: 60d53e2c3b75 ("tracing/probe: Split trace_event related data from trace_probe") Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 7579c53bb053..0ba3239c0270 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -371,31 +371,24 @@ static int enable_trace_kprobe(struct trace_event_call *call, if (enabled) return 0; - enabled = false; list_for_each_entry(pos, trace_probe_probe_list(tp), list) { tk = container_of(pos, struct trace_kprobe, tp); if (trace_kprobe_has_gone(tk)) continue; ret = __enable_trace_kprobe(tk); - if (ret) { - if (enabled) { - __disable_trace_kprobe(tp); - enabled = false; - } + if (ret) break; - } enabled = true; } - if (!enabled) { - /* No probe is enabled. Roll back */ + if (ret) { + /* Failed to enable one of them. Roll back all */ + if (enabled) + __disable_trace_kprobe(tp); if (file) trace_probe_remove_file(tp, file); else trace_probe_clear_flag(tp, TP_FLAG_PROFILE); - if (!ret) - /* Since all probes are gone, this is not available */ - ret = -EADDRNOTAVAIL; } return ret; From fe60b0ce8e7335269722ec080173a9411a9d58a5 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 18 Sep 2019 17:55:46 +0900 Subject: [PATCH 32/33] tracing/probe: Reject exactly same probe event Reject exactly same probe events as existing probes. Multiprobe allows user to define multiple probes on same event. If user appends a probe which exactly same definition (same probe address and same arguments) on existing event, the event will record same probe information twice. That can be confusing users, so reject it. Link: http://lkml.kernel.org/r/156879694602.31056.5533024778165036763.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 52 +++++++++++++++++++++++++++++++------ kernel/trace/trace_probe.h | 3 ++- kernel/trace/trace_uprobe.c | 52 +++++++++++++++++++++++++++++++------ 3 files changed, 90 insertions(+), 17 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 0ba3239c0270..a6697e28ddda 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -528,10 +528,53 @@ static int unregister_trace_kprobe(struct trace_kprobe *tk) return 0; } +static bool trace_kprobe_has_same_kprobe(struct trace_kprobe *orig, + struct trace_kprobe *comp) +{ + struct trace_probe_event *tpe = orig->tp.event; + struct trace_probe *pos; + int i; + + list_for_each_entry(pos, &tpe->probes, list) { + orig = container_of(pos, struct trace_kprobe, tp); + if (strcmp(trace_kprobe_symbol(orig), + trace_kprobe_symbol(comp)) || + trace_kprobe_offset(orig) != trace_kprobe_offset(comp)) + continue; + + /* + * trace_probe_compare_arg_type() ensured that nr_args and + * each argument name and type are same. Let's compare comm. + */ + for (i = 0; i < orig->tp.nr_args; i++) { + if (strcmp(orig->tp.args[i].comm, + comp->tp.args[i].comm)) + continue; + } + + return true; + } + + return false; +} + static int append_trace_kprobe(struct trace_kprobe *tk, struct trace_kprobe *to) { int ret; + ret = trace_probe_compare_arg_type(&tk->tp, &to->tp); + if (ret) { + /* Note that argument starts index = 2 */ + trace_probe_log_set_index(ret + 1); + trace_probe_log_err(0, DIFF_ARG_TYPE); + return -EEXIST; + } + if (trace_kprobe_has_same_kprobe(to, tk)) { + trace_probe_log_set_index(0); + trace_probe_log_err(0, SAME_PROBE); + return -EEXIST; + } + /* Append to existing event */ ret = trace_probe_append(&tk->tp, &to->tp); if (ret) @@ -568,14 +611,7 @@ static int register_trace_kprobe(struct trace_kprobe *tk) trace_probe_log_err(0, DIFF_PROBE_TYPE); ret = -EEXIST; } else { - ret = trace_probe_compare_arg_type(&tk->tp, &old_tk->tp); - if (ret) { - /* Note that argument starts index = 2 */ - trace_probe_log_set_index(ret + 1); - trace_probe_log_err(0, DIFF_ARG_TYPE); - ret = -EEXIST; - } else - ret = append_trace_kprobe(tk, old_tk); + ret = append_trace_kprobe(tk, old_tk); } goto end; } diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index f805cc4cbe7c..4ee703728aec 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -436,7 +436,8 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call, C(BAD_INSN_BNDRY, "Probe point is not an instruction boundary"),\ C(FAIL_REG_PROBE, "Failed to register probe event"),\ C(DIFF_PROBE_TYPE, "Probe type is different from existing probe"),\ - C(DIFF_ARG_TYPE, "Argument type or name is different from existing probe"), + C(DIFF_ARG_TYPE, "Argument type or name is different from existing probe"),\ + C(SAME_PROBE, "There is already the exact same probe event"), #undef C #define C(a, b) TP_ERR_##a diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index cbf4da4bf367..34dd6d0016a3 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -410,10 +410,53 @@ static int unregister_trace_uprobe(struct trace_uprobe *tu) return 0; } +static bool trace_uprobe_has_same_uprobe(struct trace_uprobe *orig, + struct trace_uprobe *comp) +{ + struct trace_probe_event *tpe = orig->tp.event; + struct trace_probe *pos; + struct inode *comp_inode = d_real_inode(comp->path.dentry); + int i; + + list_for_each_entry(pos, &tpe->probes, list) { + orig = container_of(pos, struct trace_uprobe, tp); + if (comp_inode != d_real_inode(orig->path.dentry) || + comp->offset != orig->offset) + continue; + + /* + * trace_probe_compare_arg_type() ensured that nr_args and + * each argument name and type are same. Let's compare comm. + */ + for (i = 0; i < orig->tp.nr_args; i++) { + if (strcmp(orig->tp.args[i].comm, + comp->tp.args[i].comm)) + continue; + } + + return true; + } + + return false; +} + static int append_trace_uprobe(struct trace_uprobe *tu, struct trace_uprobe *to) { int ret; + ret = trace_probe_compare_arg_type(&tu->tp, &to->tp); + if (ret) { + /* Note that argument starts index = 2 */ + trace_probe_log_set_index(ret + 1); + trace_probe_log_err(0, DIFF_ARG_TYPE); + return -EEXIST; + } + if (trace_uprobe_has_same_uprobe(to, tu)) { + trace_probe_log_set_index(0); + trace_probe_log_err(0, SAME_PROBE); + return -EEXIST; + } + /* Append to existing event */ ret = trace_probe_append(&tu->tp, &to->tp); if (!ret) @@ -469,14 +512,7 @@ static int register_trace_uprobe(struct trace_uprobe *tu) trace_probe_log_err(0, DIFF_PROBE_TYPE); ret = -EEXIST; } else { - ret = trace_probe_compare_arg_type(&tu->tp, &old_tu->tp); - if (ret) { - /* Note that argument starts index = 2 */ - trace_probe_log_set_index(ret + 1); - trace_probe_log_err(0, DIFF_ARG_TYPE); - ret = -EEXIST; - } else - ret = append_trace_uprobe(tu, old_tu); + ret = append_trace_uprobe(tu, old_tu); } goto end; } From b78b94b82122208902c0f83805e614e1239f9893 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 18 Sep 2019 17:55:55 +0900 Subject: [PATCH 33/33] selftests/ftrace: Update kprobe event error testcase Update kprobe event error testcase to test if it correctly finds the exact same probe event. Link: http://lkml.kernel.org/r/156879695513.31056.1580235733738840126.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- .../selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc index 39ef7ac1f51c..8a4025e912cb 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc @@ -95,6 +95,7 @@ echo 'p:kprobes/testevent _do_fork abcd=\1' > kprobe_events check_error 'p:kprobes/testevent _do_fork ^bcd=\1' # DIFF_ARG_TYPE check_error 'p:kprobes/testevent _do_fork ^abcd=\1:u8' # DIFF_ARG_TYPE check_error 'p:kprobes/testevent _do_fork ^abcd=\"foo"' # DIFF_ARG_TYPE +check_error '^p:kprobes/testevent _do_fork' # SAME_PROBE fi exit 0