From a61ea6379ae9dbb63fbf022d1456733520db6be7 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 19 Nov 2020 14:53:22 +0900 Subject: [PATCH 01/12] tools/bootconfig: Fix errno reference after printf() Fix not to refer the errno variable as the result of previous libc functions after printf() because printf() can change the errno. Link: https://lkml.kernel.org/r/160576520243.320071.51093664672431249.stgit@devnote2 Fixes: 85c46b78da58 ("bootconfig: Add bootconfig magic word for indicating bootconfig explicitly") Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- tools/bootconfig/main.c | 52 +++++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index eb92027817a7..52eb2bbe8966 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -147,6 +147,12 @@ static int load_xbc_file(const char *path, char **buf) return ret; } +static int pr_errno(const char *msg, int err) +{ + pr_err("%s: %d\n", msg, err); + return err; +} + static int load_xbc_from_initrd(int fd, char **buf) { struct stat stat; @@ -162,26 +168,24 @@ static int load_xbc_from_initrd(int fd, char **buf) if (stat.st_size < 8 + BOOTCONFIG_MAGIC_LEN) return 0; - if (lseek(fd, -BOOTCONFIG_MAGIC_LEN, SEEK_END) < 0) { - pr_err("Failed to lseek: %d\n", -errno); - return -errno; - } + if (lseek(fd, -BOOTCONFIG_MAGIC_LEN, SEEK_END) < 0) + return pr_errno("Failed to lseek for magic", -errno); + if (read(fd, magic, BOOTCONFIG_MAGIC_LEN) < 0) - return -errno; + return pr_errno("Failed to read", -errno); + /* Check the bootconfig magic bytes */ if (memcmp(magic, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN) != 0) return 0; - if (lseek(fd, -(8 + BOOTCONFIG_MAGIC_LEN), SEEK_END) < 0) { - pr_err("Failed to lseek: %d\n", -errno); - return -errno; - } + if (lseek(fd, -(8 + BOOTCONFIG_MAGIC_LEN), SEEK_END) < 0) + return pr_errno("Failed to lseek for size", -errno); if (read(fd, &size, sizeof(u32)) < 0) - return -errno; + return pr_errno("Failed to read size", -errno); if (read(fd, &csum, sizeof(u32)) < 0) - return -errno; + return pr_errno("Failed to read checksum", -errno); /* Wrong size error */ if (stat.st_size < size + 8 + BOOTCONFIG_MAGIC_LEN) { @@ -190,10 +194,8 @@ static int load_xbc_from_initrd(int fd, char **buf) } if (lseek(fd, stat.st_size - (size + 8 + BOOTCONFIG_MAGIC_LEN), - SEEK_SET) < 0) { - pr_err("Failed to lseek: %d\n", -errno); - return -errno; - } + SEEK_SET) < 0) + return pr_errno("Failed to lseek", -errno); ret = load_xbc_fd(fd, buf, size); if (ret < 0) @@ -262,14 +264,16 @@ static int show_xbc(const char *path, bool list) ret = stat(path, &st); if (ret < 0) { - pr_err("Failed to stat %s: %d\n", path, -errno); - return -errno; + ret = -errno; + pr_err("Failed to stat %s: %d\n", path, ret); + return ret; } fd = open(path, O_RDONLY); if (fd < 0) { - pr_err("Failed to open initrd %s: %d\n", path, fd); - return -errno; + ret = -errno; + pr_err("Failed to open initrd %s: %d\n", path, ret); + return ret; } ret = load_xbc_from_initrd(fd, &buf); @@ -307,8 +311,9 @@ static int delete_xbc(const char *path) fd = open(path, O_RDWR); if (fd < 0) { - pr_err("Failed to open initrd %s: %d\n", path, fd); - return -errno; + ret = -errno; + pr_err("Failed to open initrd %s: %d\n", path, ret); + return ret; } size = load_xbc_from_initrd(fd, &buf); @@ -383,9 +388,10 @@ static int apply_xbc(const char *path, const char *xbc_path) /* Apply new one */ fd = open(path, O_RDWR | O_APPEND); if (fd < 0) { - pr_err("Failed to open %s: %d\n", path, fd); + ret = -errno; + pr_err("Failed to open %s: %d\n", path, ret); free(data); - return fd; + return ret; } /* TODO: Ensure the @path is initramfs/initrd image */ ret = write(fd, data, size + 8); From a995e6bc0524450adfd6181dfdcd9d0520cfaba5 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 19 Nov 2020 14:53:31 +0900 Subject: [PATCH 02/12] tools/bootconfig: Fix to check the write failure correctly Fix to check the write(2) failure including partial write correctly and try to rollback the partial write, because if there is no BOOTCONFIG_MAGIC string, we can not remove it. Link: https://lkml.kernel.org/r/160576521135.320071.3883101436675969998.stgit@devnote2 Fixes: 85c46b78da58 ("bootconfig: Add bootconfig magic word for indicating bootconfig explicitly") Suggested-by: Linus Torvalds Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- tools/bootconfig/main.c | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index 52eb2bbe8966..a0733cbb3c49 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -337,6 +337,7 @@ static int delete_xbc(const char *path) static int apply_xbc(const char *path, const char *xbc_path) { + struct stat stat; u32 size, csum; char *buf, *data; int ret, fd; @@ -394,16 +395,26 @@ static int apply_xbc(const char *path, const char *xbc_path) return ret; } /* TODO: Ensure the @path is initramfs/initrd image */ - ret = write(fd, data, size + 8); - if (ret < 0) { - pr_err("Failed to apply a boot config: %d\n", ret); + if (fstat(fd, &stat) < 0) { + pr_err("Failed to get the size of %s\n", path); goto out; } + ret = write(fd, data, size + 8); + if (ret < size + 8) { + if (ret < 0) + ret = -errno; + pr_err("Failed to apply a boot config: %d\n", ret); + if (ret < 0) + goto out; + goto out_rollback; + } /* Write a magic word of the bootconfig */ ret = write(fd, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN); - if (ret < 0) { + if (ret < BOOTCONFIG_MAGIC_LEN) { + if (ret < 0) + ret = -errno; pr_err("Failed to apply a boot config magic: %d\n", ret); - goto out; + goto out_rollback; } ret = 0; out: @@ -411,6 +422,17 @@ static int apply_xbc(const char *path, const char *xbc_path) free(data); return ret; + +out_rollback: + /* Map the partial write to -ENOSPC */ + if (ret >= 0) + ret = -ENOSPC; + if (ftruncate(fd, stat.st_size) < 0) { + ret = -errno; + pr_err("Failed to rollback the write error: %d\n", ret); + pr_err("The initrd %s may be corrupted. Recommend to rebuild.\n", path); + } + goto out; } static int usage(void) From e1cef2d4c379b2aab43a7dc9601f645048209090 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 19 Nov 2020 14:53:40 +0900 Subject: [PATCH 03/12] tools/bootconfig: Align the bootconfig applied initrd image size to 4 Align the bootconfig applied initrd image size to 4. To fill the gap, the bootconfig command uses null characters in between the bootconfig data and the footer. This will expands the footer size but don't change the checksum. Thus the block image of the initrd file with bootconfig is as follows. [initrd][bootconfig][(pad)][size][csum]["#BOOTCONFIG\n"] Link: https://lkml.kernel.org/r/160576522046.320071.8550680670010950634.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- include/linux/bootconfig.h | 3 ++ tools/bootconfig/main.c | 59 +++++++++++++++++------------ tools/bootconfig/test-bootconfig.sh | 6 ++- 3 files changed, 43 insertions(+), 25 deletions(-) diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h index 9903088891fa..2696eb0fc149 100644 --- a/include/linux/bootconfig.h +++ b/include/linux/bootconfig.h @@ -12,6 +12,9 @@ #define BOOTCONFIG_MAGIC "#BOOTCONFIG\n" #define BOOTCONFIG_MAGIC_LEN 12 +#define BOOTCONFIG_ALIGN_SHIFT 2 +#define BOOTCONFIG_ALIGN (1 << BOOTCONFIG_ALIGN_SHIFT) +#define BOOTCONFIG_ALIGN_MASK (BOOTCONFIG_ALIGN - 1) /* XBC tree node */ struct xbc_node { diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index a0733cbb3c49..4a445b6304bb 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -337,12 +337,13 @@ static int delete_xbc(const char *path) static int apply_xbc(const char *path, const char *xbc_path) { + char *buf, *data, *p; + size_t total_size; struct stat stat; - u32 size, csum; - char *buf, *data; - int ret, fd; const char *msg; - int pos; + u32 size, csum; + int pos, pad; + int ret, fd; ret = load_xbc_file(xbc_path, &buf); if (ret < 0) { @@ -352,13 +353,12 @@ static int apply_xbc(const char *path, const char *xbc_path) size = strlen(buf) + 1; csum = checksum((unsigned char *)buf, size); - /* Prepare xbc_path data */ - data = malloc(size + 8); + /* Backup the bootconfig data */ + data = calloc(size + BOOTCONFIG_ALIGN + + sizeof(u32) + sizeof(u32) + BOOTCONFIG_MAGIC_LEN, 1); if (!data) return -ENOMEM; - strcpy(data, buf); - *(u32 *)(data + size) = size; - *(u32 *)(data + size + 4) = csum; + memcpy(data, buf, size); /* Check the data format */ ret = xbc_init(buf, &msg, &pos); @@ -399,24 +399,35 @@ static int apply_xbc(const char *path, const char *xbc_path) pr_err("Failed to get the size of %s\n", path); goto out; } - ret = write(fd, data, size + 8); - if (ret < size + 8) { + + /* To align up the total size to BOOTCONFIG_ALIGN, get padding size */ + total_size = stat.st_size + size + sizeof(u32) * 2 + BOOTCONFIG_MAGIC_LEN; + pad = ((total_size + BOOTCONFIG_ALIGN - 1) & (~BOOTCONFIG_ALIGN_MASK)) - total_size; + size += pad; + + /* Add a footer */ + p = data + size; + *(u32 *)p = size; + p += sizeof(u32); + + *(u32 *)p = csum; + p += sizeof(u32); + + memcpy(p, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN); + p += BOOTCONFIG_MAGIC_LEN; + + total_size = p - data; + + ret = write(fd, data, total_size); + if (ret < total_size) { if (ret < 0) ret = -errno; pr_err("Failed to apply a boot config: %d\n", ret); - if (ret < 0) - goto out; - goto out_rollback; - } - /* Write a magic word of the bootconfig */ - ret = write(fd, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN); - if (ret < BOOTCONFIG_MAGIC_LEN) { - if (ret < 0) - ret = -errno; - pr_err("Failed to apply a boot config magic: %d\n", ret); - goto out_rollback; - } - ret = 0; + if (ret >= 0) + goto out_rollback; + } else + ret = 0; + out: close(fd); free(data); diff --git a/tools/bootconfig/test-bootconfig.sh b/tools/bootconfig/test-bootconfig.sh index d295e406a756..baed891d0ba4 100755 --- a/tools/bootconfig/test-bootconfig.sh +++ b/tools/bootconfig/test-bootconfig.sh @@ -9,6 +9,7 @@ else TESTDIR=. fi BOOTCONF=${TESTDIR}/bootconfig +ALIGN=4 INITRD=`mktemp ${TESTDIR}/initrd-XXXX` TEMPCONF=`mktemp ${TESTDIR}/temp-XXXX.bconf` @@ -59,7 +60,10 @@ echo "Show command test" xpass $BOOTCONF $INITRD echo "File size check" -xpass test $new_size -eq $(expr $bconf_size + $initrd_size + 9 + 12) +total_size=$(expr $bconf_size + $initrd_size + 9 + 12 + $ALIGN - 1 ) +total_size=$(expr $total_size / $ALIGN) +total_size=$(expr $total_size \* $ALIGN) +xpass test $new_size -eq $total_size echo "Apply command repeat test" xpass $BOOTCONF -a $TEMPCONF $INITRD From fbc6e1c6e0a4b5ef402f9eb8d00880a5e1d98df3 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 19 Nov 2020 14:53:49 +0900 Subject: [PATCH 04/12] docs: bootconfig: Update file format on initrd image To align the total file size, add padding null character when appending the bootconfig to initrd image. Link: https://lkml.kernel.org/r/160576522916.320071.4145530996151028855.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- Documentation/admin-guide/bootconfig.rst | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/Documentation/admin-guide/bootconfig.rst b/Documentation/admin-guide/bootconfig.rst index a22024f9175e..363599683784 100644 --- a/Documentation/admin-guide/bootconfig.rst +++ b/Documentation/admin-guide/bootconfig.rst @@ -137,15 +137,22 @@ Boot Kernel With a Boot Config ============================== Since the boot configuration file is loaded with initrd, it will be added -to the end of the initrd (initramfs) image file with size, checksum and -12-byte magic word as below. +to the end of the initrd (initramfs) image file with padding, size, +checksum and 12-byte magic word as below. -[initrd][bootconfig][size(u32)][checksum(u32)][#BOOTCONFIG\n] +[initrd][bootconfig][padding][size(u32)][checksum(u32)][#BOOTCONFIG\n] + +When the boot configuration is added to the initrd image, the total +file size is aligned to 4 bytes. To fill the gap, null characters +(``\0``) will be added. Thus the ``size`` is the length of the bootconfig +file + padding bytes. The Linux kernel decodes the last part of the initrd image in memory to get the boot configuration data. Because of this "piggyback" method, there is no need to change or -update the boot loader and the kernel image itself. +update the boot loader and the kernel image itself as long as the boot +loader passes the correct initrd file size. If by any chance, the boot +loader passes a longer size, the kernel feils to find the bootconfig data. To do this operation, Linux kernel provides "bootconfig" command under tools/bootconfig, which allows admin to apply or delete the config file @@ -176,7 +183,8 @@ up to 512 key-value pairs. If keys contains 3 words in average, it can contain 256 key-value pairs. In most cases, the number of config items will be under 100 entries and smaller than 8KB, so it would be enough. If the node number exceeds 1024, parser returns an error even if the file -size is smaller than 32KB. +size is smaller than 32KB. (Note that this maximum size is not including +the padding null characters.) Anyway, since bootconfig command verifies it when appending a boot config to initrd image, user can notice it before boot. From 55ea4cf403800af2ce6b125bc3d853117e0c0456 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 27 Nov 2020 11:20:58 -0500 Subject: [PATCH 05/12] ring-buffer: Update write stamp with the correct ts The write stamp, used to calculate deltas between events, was updated with the stale "ts" value in the "info" structure, and not with the updated "ts" variable. This caused the deltas between events to be inaccurate, and when crossing into a new sub buffer, had time go backwards. Link: https://lkml.kernel.org/r/20201124223917.795844-1-elavila@google.com Cc: stable@vger.kernel.org Fixes: a389d86f7fd09 ("ring-buffer: Have nested events still record running time stamp") Reported-by: "J. Avila" Tested-by: Daniel Mentz Tested-by: Will McVicker Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index dc83b3fa9fe7..bccaf88d3706 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3291,7 +3291,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, /* Nothing came after this event between C and E */ info->delta = ts - info->after; (void)rb_time_cmpxchg(&cpu_buffer->write_stamp, - info->after, info->ts); + info->after, ts); info->ts = ts; } else { /* From 8785f51a17083eee7c37606079c6447afc6ba102 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Sat, 28 Nov 2020 10:15:17 +0100 Subject: [PATCH 06/12] ring-buffer: Set the right timestamp in the slow path of __rb_reserve_next() In the slow path of __rb_reserve_next() a nested event(s) can happen between evaluating the timestamp delta of the current event and updating write_stamp via local_cmpxchg(); in this case the delta is not valid anymore and it should be set to 0 (same timestamp as the interrupting event), since the event that we are currently processing is not the last event in the buffer. Link: https://lkml.kernel.org/r/X8IVJcp1gRE+FJCJ@xps-13-7390 Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: stable@vger.kernel.org Link: https://lwn.net/Articles/831207 Fixes: a389d86f7fd0 ("ring-buffer: Have nested events still record running time stamp") Signed-off-by: Andrea Righi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index bccaf88d3706..35d91b20d47a 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3287,11 +3287,11 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, ts = rb_time_stamp(cpu_buffer->buffer); barrier(); /*E*/ if (write == (local_read(&tail_page->write) & RB_WRITE_MASK) && - info->after < ts) { + info->after < ts && + rb_time_cmpxchg(&cpu_buffer->write_stamp, + info->after, ts)) { /* Nothing came after this event between C and E */ info->delta = ts - info->after; - (void)rb_time_cmpxchg(&cpu_buffer->write_stamp, - info->after, ts); info->ts = ts; } else { /* From 983df5f2699f83f78643b19d3399b160d1e64f5b Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 13 Nov 2020 10:34:14 -0800 Subject: [PATCH 07/12] samples/ftrace: Mark my_tramp[12]? global my_tramp[12]? are declared as global functions in C, but they are not marked global in the inline assembly definition. This mismatch confuses Clang's Control-Flow Integrity checking. Fix the definitions by adding .globl. Link: https://lkml.kernel.org/r/20201113183414.1446671-1-samitolvanen@google.com Fixes: 9d907f1ae80b8 ("ftrace/samples: Add a sample module that implements modify_ftrace_direct()") Reviewed-by: Kees Cook Signed-off-by: Sami Tolvanen Signed-off-by: Steven Rostedt (VMware) --- samples/ftrace/ftrace-direct-modify.c | 2 ++ samples/ftrace/ftrace-direct-too.c | 1 + samples/ftrace/ftrace-direct.c | 1 + 3 files changed, 4 insertions(+) diff --git a/samples/ftrace/ftrace-direct-modify.c b/samples/ftrace/ftrace-direct-modify.c index c13a5bc5095b..5b9a09957c6e 100644 --- a/samples/ftrace/ftrace-direct-modify.c +++ b/samples/ftrace/ftrace-direct-modify.c @@ -21,6 +21,7 @@ static unsigned long my_ip = (unsigned long)schedule; asm ( " .pushsection .text, \"ax\", @progbits\n" " .type my_tramp1, @function\n" +" .globl my_tramp1\n" " my_tramp1:" " pushq %rbp\n" " movq %rsp, %rbp\n" @@ -29,6 +30,7 @@ asm ( " .size my_tramp1, .-my_tramp1\n" " ret\n" " .type my_tramp2, @function\n" +" .globl my_tramp2\n" " my_tramp2:" " pushq %rbp\n" " movq %rsp, %rbp\n" diff --git a/samples/ftrace/ftrace-direct-too.c b/samples/ftrace/ftrace-direct-too.c index d5c5022be664..3f0079c9bd6f 100644 --- a/samples/ftrace/ftrace-direct-too.c +++ b/samples/ftrace/ftrace-direct-too.c @@ -16,6 +16,7 @@ extern void my_tramp(void *); asm ( " .pushsection .text, \"ax\", @progbits\n" " .type my_tramp, @function\n" +" .globl my_tramp\n" " my_tramp:" " pushq %rbp\n" " movq %rsp, %rbp\n" diff --git a/samples/ftrace/ftrace-direct.c b/samples/ftrace/ftrace-direct.c index 63ca06d42c80..a2729d1ef17f 100644 --- a/samples/ftrace/ftrace-direct.c +++ b/samples/ftrace/ftrace-direct.c @@ -14,6 +14,7 @@ extern void my_tramp(void *); asm ( " .pushsection .text, \"ax\", @progbits\n" " .type my_tramp, @function\n" +" .globl my_tramp\n" " my_tramp:" " pushq %rbp\n" " movq %rsp, %rbp\n" From 310e3a4b5a4fc718a72201c1e4cf5c64ac6f5442 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Wed, 18 Nov 2020 15:05:20 +0300 Subject: [PATCH 08/12] tracing: Remove WARN_ON in start_thread() This patch reverts commit 978defee11a5 ("tracing: Do a WARN_ON() if start_thread() in hwlat is called when thread exists") .start hook can be legally called several times if according tracer is stopped screen window 1 [root@localhost ~]# echo 1 > /sys/kernel/tracing/events/kmem/kfree/enable [root@localhost ~]# echo 1 > /sys/kernel/tracing/options/pause-on-trace [root@localhost ~]# less -F /sys/kernel/tracing/trace screen window 2 [root@localhost ~]# cat /sys/kernel/debug/tracing/tracing_on 0 [root@localhost ~]# echo hwlat > /sys/kernel/debug/tracing/current_tracer [root@localhost ~]# echo 1 > /sys/kernel/debug/tracing/tracing_on [root@localhost ~]# cat /sys/kernel/debug/tracing/tracing_on 0 [root@localhost ~]# echo 2 > /sys/kernel/debug/tracing/tracing_on triggers warning in dmesg: WARNING: CPU: 3 PID: 1403 at kernel/trace/trace_hwlat.c:371 hwlat_tracer_start+0xc9/0xd0 Link: https://lkml.kernel.org/r/bd4d3e70-400d-9c82-7b73-a2d695e86b58@virtuozzo.com Cc: Ingo Molnar Cc: stable@vger.kernel.org Fixes: 978defee11a5 ("tracing: Do a WARN_ON() if start_thread() in hwlat is called when thread exists") Signed-off-by: Vasily Averin Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_hwlat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index c9ad5c6fbaad..d071fc271eef 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -368,7 +368,7 @@ static int start_kthread(struct trace_array *tr) struct task_struct *kthread; int next_cpu; - if (WARN_ON(hwlat_kthread)) + if (hwlat_kthread) return 0; /* Just pick the first CPU on first iteration */ From 8fa655a3a0013a0c2a2aada6f39a93ee6fc25549 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Wed, 25 Nov 2020 14:56:54 -0800 Subject: [PATCH 09/12] tracing: Fix alignment of static buffer With 5.9 kernel on ARM64, I found ftrace_dump output was broken but it had no problem with normal output "cat /sys/kernel/debug/tracing/trace". With investigation, it seems coping the data into temporal buffer seems to break the align binary printf expects if the static buffer is not aligned with 4-byte. IIUC, get_arg in bstr_printf expects that args has already right align to be decoded and seq_buf_bprintf says ``the arguments are saved in a 32bit word array that is defined by the format string constraints``. So if we don't keep the align under copy to temporal buffer, the output will be broken by shifting some bytes. This patch fixes it. Link: https://lkml.kernel.org/r/20201125225654.1618966-1-minchan@kernel.org Cc: Fixes: 8e99cf91b99bb ("tracing: Do not allocate buffer in trace_find_next_entry() in atomic") Signed-off-by: Namhyung Kim Signed-off-by: Minchan Kim Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 410cfeb16db5..7d53c5bdea3e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3534,7 +3534,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, } #define STATIC_TEMP_BUF_SIZE 128 -static char static_temp_buf[STATIC_TEMP_BUF_SIZE]; +static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4); /* Find the next real entry, without updating the iterator itself */ struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, From 4c75b0ff4e4bf7a45b5aef9639799719c28d0073 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 26 Nov 2020 23:38:38 +0530 Subject: [PATCH 10/12] ftrace: Fix updating FTRACE_FL_TRAMP On powerpc, kprobe-direct.tc triggered FTRACE_WARN_ON() in ftrace_get_addr_new() followed by the below message: Bad trampoline accounting at: 000000004222522f (wake_up_process+0xc/0x20) (f0000001) The set of steps leading to this involved: - modprobe ftrace-direct-too - enable_probe - modprobe ftrace-direct - rmmod ftrace-direct <-- trigger The problem turned out to be that we were not updating flags in the ftrace record properly. From the above message about the trampoline accounting being bad, it can be seen that the ftrace record still has FTRACE_FL_TRAMP set though ftrace-direct module is going away. This happens because we are checking if any ftrace_ops has the FTRACE_FL_TRAMP flag set _before_ updating the filter hash. The fix for this is to look for any _other_ ftrace_ops that also needs FTRACE_FL_TRAMP. Link: https://lkml.kernel.org/r/56c113aa9c3e10c19144a36d9684c7882bf09af5.1606412433.git.naveen.n.rao@linux.vnet.ibm.com Cc: stable@vger.kernel.org Fixes: a124692b698b0 ("ftrace: Enable trampoline when rec count returns back to one") Signed-off-by: Naveen N. Rao Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8185f7240095..9c1bba8cc51b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1629,6 +1629,8 @@ static bool test_rec_ops_needs_regs(struct dyn_ftrace *rec) static struct ftrace_ops * ftrace_find_tramp_ops_any(struct dyn_ftrace *rec); static struct ftrace_ops * +ftrace_find_tramp_ops_any_other(struct dyn_ftrace *rec, struct ftrace_ops *op_exclude); +static struct ftrace_ops * ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, struct ftrace_ops *ops); static bool __ftrace_hash_rec_update(struct ftrace_ops *ops, @@ -1778,7 +1780,7 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops, * to it. */ if (ftrace_rec_count(rec) == 1 && - ftrace_find_tramp_ops_any(rec)) + ftrace_find_tramp_ops_any_other(rec, ops)) rec->flags |= FTRACE_FL_TRAMP; else rec->flags &= ~FTRACE_FL_TRAMP; @@ -2244,6 +2246,24 @@ ftrace_find_tramp_ops_any(struct dyn_ftrace *rec) return NULL; } +static struct ftrace_ops * +ftrace_find_tramp_ops_any_other(struct dyn_ftrace *rec, struct ftrace_ops *op_exclude) +{ + struct ftrace_ops *op; + unsigned long ip = rec->ip; + + do_for_each_ftrace_op(op, ftrace_ops_list) { + + if (op == op_exclude || !op->trampoline) + continue; + + if (hash_contains_ip(ip, op->func_hash)) + return op; + } while_for_each_ftrace_op(op); + + return NULL; +} + static struct ftrace_ops * ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, struct ftrace_ops *op) From 49a962c075dfa41c78e34784772329bc8784d217 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 26 Nov 2020 23:38:39 +0530 Subject: [PATCH 11/12] ftrace: Fix DYNAMIC_FTRACE_WITH_DIRECT_CALLS dependency DYNAMIC_FTRACE_WITH_DIRECT_CALLS should depend on DYNAMIC_FTRACE_WITH_REGS since we need ftrace_regs_caller(). Link: https://lkml.kernel.org/r/fc4b257ea8689a36f086d2389a9ed989496ca63a.1606412433.git.naveen.n.rao@linux.vnet.ibm.com Cc: stable@vger.kernel.org Fixes: 763e34e74bb7d5c ("ftrace: Add register_ftrace_direct()") Signed-off-by: Naveen N. Rao Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index a4020c0b4508..e1bf5228fb69 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -202,7 +202,7 @@ config DYNAMIC_FTRACE_WITH_REGS config DYNAMIC_FTRACE_WITH_DIRECT_CALLS def_bool y - depends on DYNAMIC_FTRACE + depends on DYNAMIC_FTRACE_WITH_REGS depends on HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS config FUNCTION_PROFILER From 68e10d5ff512b503dcba1246ad5620f32035e135 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 30 Nov 2020 23:16:03 -0500 Subject: [PATCH 12/12] ring-buffer: Always check to put back before stamp when crossing pages The current ring buffer logic checks to see if the updating of the event buffer was interrupted, and if it is, it will try to fix up the before stamp with the write stamp to make them equal again. This logic is flawed, because if it is not interrupted, the two are guaranteed to be different, as the current event just updated the before stamp before allocation. This guarantees that the next event (this one or another interrupting one) will think it interrupted the time updates of a previous event and inject an absolute time stamp to compensate. The correct logic is to always update the timestamps when traversing to a new sub buffer. Cc: stable@vger.kernel.org Fixes: a389d86f7fd09 ("ring-buffer: Have nested events still record running time stamp") Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 35d91b20d47a..a6268e09160a 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3234,14 +3234,12 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, /* See if we shot pass the end of this buffer page */ if (unlikely(write > BUF_PAGE_SIZE)) { - if (tail != w) { - /* before and after may now different, fix it up*/ - b_ok = rb_time_read(&cpu_buffer->before_stamp, &info->before); - a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after); - if (a_ok && b_ok && info->before != info->after) - (void)rb_time_cmpxchg(&cpu_buffer->before_stamp, - info->before, info->after); - } + /* before and after may now different, fix it up*/ + b_ok = rb_time_read(&cpu_buffer->before_stamp, &info->before); + a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after); + if (a_ok && b_ok && info->before != info->after) + (void)rb_time_cmpxchg(&cpu_buffer->before_stamp, + info->before, info->after); return rb_move_tail(cpu_buffer, tail, info); }