From d052e1c6909f9ccbdc4112a50796afca19094229 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Mon, 10 Aug 2020 17:34:41 +0900
Subject: [PATCH 01/58] tools/bootconfig: Show bootconfig compact tree from
 bootconfig file

Show the bootconfig compact tree from the bootconfig file
instead of an initrd if the given file has no magic number
and is smaller than 32KB.

User can use this for checking the syntax error or output
checking before applying the bootconfig to initrd.

Link: https://lkml.kernel.org/r/159704848156.175360.6621139371000789360.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tools/bootconfig/main.c | 81 +++++++++++++++++++++++++++++------------
 1 file changed, 58 insertions(+), 23 deletions(-)
diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c
index e0878f5f74b1..d165e63b5d5a 100644
--- a/tools/bootconfig/main.c
+++ b/tools/bootconfig/main.c
@@ -195,10 +195,55 @@ int load_xbc_from_initrd(int fd, char **buf)
 	return size;
 }
 
+static void show_xbc_error(const char *data, const char *msg, int pos)
+{
+	int lin = 1, col, i;
+
+	if (pos < 0) {
+		pr_err("Error: %s.\n", msg);
+		return;
+	}
+
+	/* Note that pos starts from 0 but lin and col should start from 1. */
+	col = pos + 1;
+	for (i = 0; i < pos; i++) {
+		if (data[i] == '\n') {
+			lin++;
+			col = pos - i;
+		}
+	}
+	pr_err("Parse Error: %s at %d:%d\n", msg, lin, col);
+
+}
+
+static int init_xbc_with_error(char *buf, int len)
+{
+	char *copy = strdup(buf);
+	const char *msg;
+	int ret, pos;
+
+	if (!copy)
+		return -ENOMEM;
+
+	ret = xbc_init(buf, &msg, &pos);
+	if (ret < 0)
+		show_xbc_error(copy, msg, pos);
+	free(copy);
+
+	return ret;
+}
+
 int show_xbc(const char *path)
 {
 	int ret, fd;
 	char *buf = NULL;
+	struct stat st;
+
+	ret = stat(path, &st);
+	if (ret < 0) {
+		pr_err("Failed to stat %s: %d\n", path, -errno);
+		return -errno;
+	}
 
 	fd = open(path, O_RDONLY);
 	if (fd < 0) {
@@ -207,14 +252,24 @@ int show_xbc(const char *path)
 	}
 
 	ret = load_xbc_from_initrd(fd, &buf);
+	close(fd);
 	if (ret < 0) {
 		pr_err("Failed to load a boot config from initrd: %d\n", ret);
 		goto out;
 	}
+	/* Assume a bootconfig file if it is enough small */
+	if (ret == 0 && st.st_size <= XBC_DATA_MAX) {
+		ret = load_xbc_file(path, &buf);
+		if (ret < 0) {
+			pr_err("Failed to load a boot config: %d\n", ret);
+			goto out;
+		}
+		if (init_xbc_with_error(buf, ret) < 0)
+			goto out;
+	}
 	xbc_show_compact_tree();
 	ret = 0;
 out:
-	close(fd);
 	free(buf);
 
 	return ret;
@@ -251,27 +306,6 @@ int delete_xbc(const char *path)
 	return ret;
 }
 
-static void show_xbc_error(const char *data, const char *msg, int pos)
-{
-	int lin = 1, col, i;
-
-	if (pos < 0) {
-		pr_err("Error: %s.\n", msg);
-		return;
-	}
-
-	/* Note that pos starts from 0 but lin and col should start from 1. */
-	col = pos + 1;
-	for (i = 0; i < pos; i++) {
-		if (data[i] == '\n') {
-			lin++;
-			col = pos - i;
-		}
-	}
-	pr_err("Parse Error: %s at %d:%d\n", msg, lin, col);
-
-}
-
 int apply_xbc(const char *path, const char *xbc_path)
 {
 	u32 size, csum;
@@ -352,11 +386,12 @@ int apply_xbc(const char *path, const char *xbc_path)
 int usage(void)
 {
 	printf("Usage: bootconfig [OPTIONS] <INITRD>\n"
+		"Or     bootconfig <CONFIG>\n"
 		" Apply, delete or show boot config to initrd.\n"
 		" Options:\n"
 		"		-a <config>: Apply boot config to initrd\n"
 		"		-d : Delete boot config file from initrd\n\n"
-		" If no option is given, show current applied boot config.\n");
+		" If no option is given, show the bootconfig in the given file.\n");
 	return -1;
 }
 

From e4f70b7badb40598ceea31c122d7c2fb6203672a Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Mon, 10 Aug 2020 17:34:51 +0900
Subject: [PATCH 02/58] tools/bootconfig: Add list option

Add list option (-l) to show the bootconfig in the list style.
This is same output of /proc/bootconfig. So users can check
how their bootconfig will be shown in procfs. This will help
them to write a user-space script to parse the /proc/bootconfig.

Link: https://lkml.kernel.org/r/159704849087.175360.8761890802048625207.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tools/bootconfig/main.c | 52 +++++++++++++++++++++++++++++++----------
 1 file changed, 40 insertions(+), 12 deletions(-)

diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c
index d165e63b5d5a..78025267df20 100644
--- a/tools/bootconfig/main.c
+++ b/tools/bootconfig/main.c
@@ -14,18 +14,19 @@
 #include <linux/kernel.h>
 #include <linux/bootconfig.h>
 
-static int xbc_show_value(struct xbc_node *node)
+static int xbc_show_value(struct xbc_node *node, bool semicolon)
 {
-	const char *val;
+	const char *val, *eol;
 	char q;
 	int i = 0;
 
+	eol = semicolon ? ";\n" : "\n";
 	xbc_array_for_each_value(node, val) {
 		if (strchr(val, '"'))
 			q = '\'';
 		else
 			q = '"';
-		printf("%c%s%c%s", q, val, q, node->next ? ", " : ";\n");
+		printf("%c%s%c%s", q, val, q, node->next ? ", " : eol);
 		i++;
 	}
 	return i;
@@ -53,7 +54,7 @@ static void xbc_show_compact_tree(void)
 			continue;
 		} else if (cnode && xbc_node_is_value(cnode)) {
 			printf("%s = ", xbc_node_get_data(node));
-			xbc_show_value(cnode);
+			xbc_show_value(cnode, true);
 		} else {
 			printf("%s;\n", xbc_node_get_data(node));
 		}
@@ -77,6 +78,26 @@ static void xbc_show_compact_tree(void)
 	}
 }
 
+static void xbc_show_list(void)
+{
+	char key[XBC_KEYLEN_MAX];
+	struct xbc_node *leaf;
+	const char *val;
+	int ret = 0;
+
+	xbc_for_each_key_value(leaf, val) {
+		ret = xbc_node_compose_key(leaf, key, XBC_KEYLEN_MAX);
+		if (ret < 0)
+			break;
+		printf("%s = ", key);
+		if (!val || val[0] == '\0') {
+			printf("\"\"\n");
+			continue;
+		}
+		xbc_show_value(xbc_node_get_child(leaf), false);
+	}
+}
+
 /* Simple real checksum */
 int checksum(unsigned char *buf, int len)
 {
@@ -233,7 +254,7 @@ static int init_xbc_with_error(char *buf, int len)
 	return ret;
 }
 
-int show_xbc(const char *path)
+int show_xbc(const char *path, bool list)
 {
 	int ret, fd;
 	char *buf = NULL;
@@ -267,7 +288,10 @@ int show_xbc(const char *path)
 		if (init_xbc_with_error(buf, ret) < 0)
 			goto out;
 	}
-	xbc_show_compact_tree();
+	if (list)
+		xbc_show_list();
+	else
+		xbc_show_compact_tree();
 	ret = 0;
 out:
 	free(buf);
@@ -390,7 +414,8 @@ int usage(void)
 		" Apply, delete or show boot config to initrd.\n"
 		" Options:\n"
 		"		-a <config>: Apply boot config to initrd\n"
-		"		-d : Delete boot config file from initrd\n\n"
+		"		-d : Delete boot config file from initrd\n"
+		"		-l : list boot config in initrd or file\n\n"
 		" If no option is given, show the bootconfig in the given file.\n");
 	return -1;
 }
@@ -399,10 +424,10 @@ int main(int argc, char **argv)
 {
 	char *path = NULL;
 	char *apply = NULL;
-	bool delete = false;
+	bool delete = false, list = false;
 	int opt;
 
-	while ((opt = getopt(argc, argv, "hda:")) != -1) {
+	while ((opt = getopt(argc, argv, "hda:l")) != -1) {
 		switch (opt) {
 		case 'd':
 			delete = true;
@@ -410,14 +435,17 @@ int main(int argc, char **argv)
 		case 'a':
 			apply = optarg;
 			break;
+		case 'l':
+			list = true;
+			break;
 		case 'h':
 		default:
 			return usage();
 		}
 	}
 
-	if (apply && delete) {
-		pr_err("Error: You can not specify both -a and -d at once.\n");
+	if ((apply && delete) || (delete && list) || (apply && list)) {
+		pr_err("Error: You can give one of -a, -d or -l at once.\n");
 		return usage();
 	}
 
@@ -433,5 +461,5 @@ int main(int argc, char **argv)
 	else if (delete)
 		return delete_xbc(path);
 
-	return show_xbc(path);
+	return show_xbc(path, list);
 }

From 483ce6708dce7116ef2c83b36a1cfe28a36c4fc9 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Mon, 10 Aug 2020 17:35:01 +0900
Subject: [PATCH 03/58] tools/bootconfig: Make all functions static

Make all functions static except for main(). This is just a cleanup.

Link: https://lkml.kernel.org/r/159704850135.175360.12465608936326167517.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tools/bootconfig/main.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c
index 78025267df20..eb92027817a7 100644
--- a/tools/bootconfig/main.c
+++ b/tools/bootconfig/main.c
@@ -99,7 +99,7 @@ static void xbc_show_list(void)
 }
 
 /* Simple real checksum */
-int checksum(unsigned char *buf, int len)
+static int checksum(unsigned char *buf, int len)
 {
 	int i, sum = 0;
 
@@ -111,7 +111,7 @@ int checksum(unsigned char *buf, int len)
 
 #define PAGE_SIZE	4096
 
-int load_xbc_fd(int fd, char **buf, int size)
+static int load_xbc_fd(int fd, char **buf, int size)
 {
 	int ret;
 
@@ -128,7 +128,7 @@ int load_xbc_fd(int fd, char **buf, int size)
 }
 
 /* Return the read size or -errno */
-int load_xbc_file(const char *path, char **buf)
+static int load_xbc_file(const char *path, char **buf)
 {
 	struct stat stat;
 	int fd, ret;
@@ -147,7 +147,7 @@ int load_xbc_file(const char *path, char **buf)
 	return ret;
 }
 
-int load_xbc_from_initrd(int fd, char **buf)
+static int load_xbc_from_initrd(int fd, char **buf)
 {
 	struct stat stat;
 	int ret;
@@ -254,7 +254,7 @@ static int init_xbc_with_error(char *buf, int len)
 	return ret;
 }
 
-int show_xbc(const char *path, bool list)
+static int show_xbc(const char *path, bool list)
 {
 	int ret, fd;
 	char *buf = NULL;
@@ -299,7 +299,7 @@ int show_xbc(const char *path, bool list)
 	return ret;
 }
 
-int delete_xbc(const char *path)
+static int delete_xbc(const char *path)
 {
 	struct stat stat;
 	int ret = 0, fd, size;
@@ -330,7 +330,7 @@ int delete_xbc(const char *path)
 	return ret;
 }
 
-int apply_xbc(const char *path, const char *xbc_path)
+static int apply_xbc(const char *path, const char *xbc_path)
 {
 	u32 size, csum;
 	char *buf, *data;
@@ -407,7 +407,7 @@ int apply_xbc(const char *path, const char *xbc_path)
 	return ret;
 }
 
-int usage(void)
+static int usage(void)
 {
 	printf("Usage: bootconfig [OPTIONS] <INITRD>\n"
 		"Or     bootconfig <CONFIG>\n"

From 7e66ef0046ccf896674955b819c27c49783a4deb Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Mon, 10 Aug 2020 17:35:11 +0900
Subject: [PATCH 04/58] tools/bootconfig: Add a script to generate ftrace
 shell-command from bootconfig

Add a bconf2ftrace.sh under tools/bootconfig/scripts which generates
a shell script to setup boot-time trace from bootconfig file for testing
the bootconfig.

bconf2ftrace.sh will take a bootconfig file (includes boot-time tracing)
and convert it into a shell-script which is almost same as the boot-time
tracer does.
If --apply option is given, it also tries to apply those command to the
running kernel, which requires the root privilege (or sudo).

For example, if you just want to confirm the shell commands, save
the output as below.

 # bconf2ftrace.sh ftrace.bconf > ftrace.sh

Or, you can apply it directly.

 # bconf2ftrace.sh --apply ftrace.bconf

Note that some boot-time tracing parameters under kernel.* are not able
to set via tracefs nor procfs (e.g. tp_printk, traceoff_on_warning.),
so those are ignored.

Link: https://lkml.kernel.org/r/159704851101.175360.15119132351139842345.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 MAINTAINERS                              |   1 +
 tools/bootconfig/scripts/bconf2ftrace.sh | 189 +++++++++++++++++++++++
 tools/bootconfig/scripts/xbc.sh          |  56 +++++++
 3 files changed, 246 insertions(+)
 create mode 100755 tools/bootconfig/scripts/bconf2ftrace.sh
 create mode 100644 tools/bootconfig/scripts/xbc.sh

diff --git a/MAINTAINERS b/MAINTAINERS
index 0d0862b19ce5..5bd82d7da40c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6601,6 +6601,7 @@ F:	fs/proc/bootconfig.c
 F:	include/linux/bootconfig.h
 F:	lib/bootconfig.c
 F:	tools/bootconfig/*
+F:	tools/bootconfig/scripts/*
 
 EXYNOS DP DRIVER
 M:	Jingoo Han <jingoohan1@gmail.com>
diff --git a/tools/bootconfig/scripts/bconf2ftrace.sh b/tools/bootconfig/scripts/bconf2ftrace.sh
new file mode 100755
index 000000000000..a46e984fb2ff
--- /dev/null
+++ b/tools/bootconfig/scripts/bconf2ftrace.sh
@@ -0,0 +1,189 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-only
+
+usage() {
+	echo "Ftrace boottime trace test tool"
+	echo "Usage: $0 [--apply] [--debug] BOOTCONFIG-FILE"
+	echo "    --apply: Test actual apply to tracefs (need sudo)"
+	exit 1
+}
+
+[ $# -eq 0 ] && usage
+
+BCONF=
+DEBUG=
+APPLY=
+while [ x"$1" != x ]; do
+	case "$1" in
+	"--debug")
+		DEBUG=$1;;
+	"--apply")
+		APPLY=$1;;
+	*)
+		[ ! -f $1 ] && usage
+		BCONF=$1;;
+	esac
+	shift 1
+done
+
+if [ x"$APPLY" != x ]; then
+	if [ `id -u` -ne 0 ]; then
+		echo "This must be run by root user. Try sudo." 1>&2
+		exec sudo $0 $DEBUG $APPLY $BCONF
+	fi
+fi
+
+run_cmd() { # command
+	echo "$*"
+	if [ x"$APPLY" != x ]; then # apply command
+		eval $*
+	fi
+}
+
+if [ x"$DEBUG" != x ]; then
+	set -x
+fi
+
+TRACEFS=`grep -m 1 -w tracefs /proc/mounts | cut -f 2 -d " "`
+if [ -z "$TRACEFS" ]; then
+	if ! grep -wq debugfs /proc/mounts; then
+		echo "Error: No tracefs/debugfs was mounted." 1>&2
+		exit 1
+	fi
+	TRACEFS=`grep -m 1 -w debugfs /proc/mounts | cut -f 2 -d " "`/tracing
+	if [ ! -d $TRACEFS ]; then
+		echo "Error: ftrace is not enabled on this kernel." 1>&2
+		exit 1
+	fi
+fi
+
+. `dirname $0`/xbc.sh
+
+######## main #########
+set -e
+
+xbc_init $BCONF
+
+set_value_of() { # key file
+	if xbc_has_key $1; then
+		val=`xbc_get_val $1 1`
+		run_cmd "echo '$val' >> $2"
+	fi
+}
+
+set_array_of() { # key file
+	if xbc_has_key $1; then
+		xbc_get_val $1 | while read line; do
+			run_cmd "echo '$line' >> $2"
+		done
+	fi
+}
+
+compose_synth() { # event_name branch
+	echo -n "$1 "
+	xbc_get_val $2 | while read field; do echo -n "$field; "; done
+}
+
+setup_event() { # prefix group event [instance]
+	branch=$1.$2.$3
+	if [ "$4" ]; then
+		eventdir="$TRACEFS/instances/$4/events/$2/$3"
+	else
+		eventdir="$TRACEFS/events/$2/$3"
+	fi
+	case $2 in
+	kprobes)
+		xbc_get_val ${branch}.probes | while read line; do
+			run_cmd "echo 'p:kprobes/$3 $line' >> $TRACEFS/kprobe_events"
+		done
+		;;
+	synthetic)
+		run_cmd "echo '`compose_synth $3 ${branch}.fields`' >> $TRACEFS/synthetic_events"
+		;;
+	esac
+
+	set_value_of ${branch}.filter ${eventdir}/filter
+	set_array_of ${branch}.actions ${eventdir}/trigger
+
+	if xbc_has_key ${branch}.enable; then
+		run_cmd "echo 1 > ${eventdir}/enable"
+	fi
+}
+
+setup_events() { # prefix("ftrace" or "ftrace.instance.INSTANCE") [instance]
+	prefix="${1}.event"
+	if xbc_has_branch ${1}.event; then
+		for grpev in `xbc_subkeys ${1}.event 2`; do
+			setup_event $prefix ${grpev%.*} ${grpev#*.} $2
+		done
+	fi
+}
+
+size2kb() { # size[KB|MB]
+	case $1 in
+	*KB)
+		echo ${1%KB};;
+	*MB)
+		expr ${1%MB} \* 1024;;
+	*)
+		expr $1 / 1024 ;;
+	esac
+}
+
+setup_instance() { # [instance]
+	if [ "$1" ]; then
+		instance="ftrace.instance.${1}"
+		instancedir=$TRACEFS/instances/$1
+	else
+		instance="ftrace"
+		instancedir=$TRACEFS
+	fi
+
+	set_array_of ${instance}.options ${instancedir}/trace_options
+	set_value_of ${instance}.trace_clock ${instancedir}/trace_clock
+	set_value_of ${instance}.cpumask ${instancedir}/tracing_cpumask
+	set_value_of ${instance}.tracer ${instancedir}/current_tracer
+	set_array_of ${instance}.ftrace.filters \
+		${instancedir}/set_ftrace_filter
+	set_array_of ${instance}.ftrace.notrace \
+		${instancedir}/set_ftrace_notrace
+
+	if xbc_has_key ${instance}.alloc_snapshot; then
+		run_cmd "echo 1 > ${instancedir}/snapshot"
+	fi
+
+	if xbc_has_key ${instance}.buffer_size; then
+		size=`xbc_get_val ${instance}.buffer_size 1`
+		size=`eval size2kb $size`
+		run_cmd "echo $size >> ${instancedir}/buffer_size_kb"
+	fi
+
+	setup_events ${instance} $1
+	set_array_of ${instance}.events ${instancedir}/set_event
+}
+
+# ftrace global configs (kernel.*)
+if xbc_has_key "kernel.dump_on_oops"; then
+	dump_mode=`xbc_get_val "kernel.dump_on_oops" 1`
+	[ "$dump_mode" ] && dump_mode=`eval echo $dump_mode` || dump_mode=1
+	run_cmd "echo \"$dump_mode\" > /proc/sys/kernel/ftrace_dump_on_oops"
+fi
+
+set_value_of kernel.fgraph_max_depth $TRACEFS/max_graph_depth
+set_array_of kernel.fgraph_filters $TRACEFS/set_graph_function
+set_array_of kernel.fgraph_notraces $TRACEFS/set_graph_notrace
+
+# Per-instance/per-event configs
+if ! xbc_has_branch "ftrace" ; then
+	exit 0
+fi
+
+setup_instance # root instance
+
+if xbc_has_branch "ftrace.instance"; then
+	for i in `xbc_subkeys "ftrace.instance" 1`; do
+		run_cmd "mkdir -p $TRACEFS/instances/$i"
+		setup_instance $i
+	done
+fi
+
diff --git a/tools/bootconfig/scripts/xbc.sh b/tools/bootconfig/scripts/xbc.sh
new file mode 100644
index 000000000000..b8c84e654556
--- /dev/null
+++ b/tools/bootconfig/scripts/xbc.sh
@@ -0,0 +1,56 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-only
+
+# bootconfig utility functions
+
+XBC_TMPFILE=
+XBC_BASEDIR=`dirname $0`
+BOOTCONFIG=${BOOTCONFIG:=$XBC_BASEDIR/../bootconfig}
+if [ ! -x "$BOOTCONFIG" ]; then
+	BOOTCONFIG=`which bootconfig`
+	if [ -z "$BOOTCONFIG" ]; then
+		echo "Erorr: bootconfig command is not found" 1>&2
+		exit 1
+	fi
+fi
+
+xbc_cleanup() {
+	if [ "$XBC_TMPFILE" ]; then
+		rm -f "$XBC_TMPFILE"
+	fi
+}
+
+xbc_init() { # bootconfig-file
+	xbc_cleanup
+	XBC_TMPFILE=`mktemp bconf-XXXX`
+	trap xbc_cleanup EXIT TERM
+
+	$BOOTCONFIG -l $1 > $XBC_TMPFILE || exit 1
+}
+
+nr_args() { # args
+	echo $#
+}
+
+xbc_get_val() { # key [maxnum]
+	if [ "$2" ]; then
+		MAXOPT="-L $2"
+	fi
+	grep "^$1 =" $XBC_TMPFILE | cut -d= -f2- | \
+		sed -e 's/", /" /g' -e "s/',/' /g" | \
+		xargs $MAXOPT -n 1 echo
+}
+
+xbc_has_key() { # key
+	grep -q "^$1 =" $XBC_TMPFILE
+}
+
+xbc_has_branch() { # prefix-key
+	grep -q "^$1" $XBC_TMPFILE
+}
+
+xbc_subkeys() { # prefix-key depth
+	__keys=`echo $1 | sed "s/\./ /g"`
+	__s=`nr_args $__keys`
+	grep "^$1" $XBC_TMPFILE | cut -d= -f1| cut -d. -f$((__s + 1))-$((__s + $2)) | uniq
+}

From 2b86062a34a81427fca082540e3593b5a6b49a13 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Mon, 10 Aug 2020 17:35:21 +0900
Subject: [PATCH 05/58] tools/bootconfig: Add a script to generates bootconfig
 from ftrace

Add a ftrace2bconf.sh under tools/bootconfig/scripts which generates
a bootconfig file from the current ftrace settings.

To read the ftrace settings, ftrace2bconf.sh requires the root
privilege (or sudo). The ftrace2bconf.sh will output the bootconfig
to stdout and error messages to stderr, so usually you'll run it as

 # ftrace2bconf.sh > ftrace.bconf

Note that some ftrace configurations are not supported. For example,
function-call/callgraph trace/notrace settings are not supported because
the wildcard has been expanded and lost in the ftrace anymore.

Link: https://lkml.kernel.org/r/159704852163.175360.16738029520293360558.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tools/bootconfig/scripts/ftrace2bconf.sh | 244 +++++++++++++++++++++++
 1 file changed, 244 insertions(+)
 create mode 100755 tools/bootconfig/scripts/ftrace2bconf.sh

diff --git a/tools/bootconfig/scripts/ftrace2bconf.sh b/tools/bootconfig/scripts/ftrace2bconf.sh
new file mode 100755
index 000000000000..6c0d4b61e0c2
--- /dev/null
+++ b/tools/bootconfig/scripts/ftrace2bconf.sh
@@ -0,0 +1,244 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-only
+
+usage() {
+	echo "Dump boot-time tracing bootconfig from ftrace"
+	echo "Usage: $0 [--debug] [ > BOOTCONFIG-FILE]"
+	exit 1
+}
+
+DEBUG=
+while [ x"$1" != x ]; do
+	case "$1" in
+	"--debug")
+		DEBUG=$1;;
+	-*)
+		usage
+		;;
+	esac
+	shift 1
+done
+
+if [ x"$DEBUG" != x ]; then
+	set -x
+fi
+
+TRACEFS=`grep -m 1 -w tracefs /proc/mounts | cut -f 2 -d " "`
+if [ -z "$TRACEFS" ]; then
+	if ! grep -wq debugfs /proc/mounts; then
+		echo "Error: No tracefs/debugfs was mounted."
+		exit 1
+	fi
+	TRACEFS=`grep -m 1 -w debugfs /proc/mounts | cut -f 2 -d " "`/tracing
+	if [ ! -d $TRACEFS ]; then
+		echo "Error: ftrace is not enabled on this kernel." 1>&2
+		exit 1
+	fi
+fi
+
+######## main #########
+
+set -e
+
+emit_kv() { # key =|+= value
+	echo "$@"
+}
+
+global_options() {
+	val=`cat $TRACEFS/max_graph_depth`
+	[ $val != 0 ] && emit_kv kernel.fgraph_max_depth = $val
+	if grep -qv "^#" $TRACEFS/set_graph_function $TRACEFS/set_graph_notrace ; then
+		cat 1>&2 << EOF
+# WARN: kernel.fgraph_filters and kernel.fgraph_notrace are not supported, since the wild card expression was expanded and lost from memory.
+EOF
+	fi
+}
+
+kprobe_event_options() {
+	cat $TRACEFS/kprobe_events | while read p args; do
+		case $p in
+		r*)
+		cat 1>&2 << EOF
+# WARN: A return probe found but it is not supported by bootconfig. Skip it.
+EOF
+		continue;;
+		esac
+		p=${p#*:}
+		event=${p#*/}
+		group=${p%/*}
+		if [ $group != "kprobes" ]; then
+			cat 1>&2 << EOF
+# WARN: kprobes group name $group is changed to "kprobes" for bootconfig.
+EOF
+		fi
+		emit_kv $PREFIX.event.kprobes.$event.probes += $args
+	done
+}
+
+synth_event_options() {
+	cat $TRACEFS/synthetic_events | while read event fields; do
+		emit_kv $PREFIX.event.synthetic.$event.fields = `echo $fields | sed "s/;/,/g"`
+	done
+}
+
+# Variables resolver
+DEFINED_VARS=
+UNRESOLVED_EVENTS=
+
+defined_vars() { # event-dir
+	grep "^hist" $1/trigger | grep -o ':[a-zA-Z0-9]*='
+}
+referred_vars() {
+	grep "^hist" $1/trigger | grep -o '$[a-zA-Z0-9]*'
+}
+
+per_event_options() { # event-dir
+	evdir=$1
+	# Check the special event which has no filter and no trigger
+	[ ! -f $evdir/filter ] && return
+
+	if grep -q "^hist:" $evdir/trigger; then
+		# hist action can refer the undefined variables
+		__vars=`defined_vars $evdir`
+		for v in `referred_vars $evdir`; do
+			if echo $DEFINED_VARS $__vars | grep -vqw ${v#$}; then
+				# $v is not defined yet, defer it
+				UNRESOLVED_EVENTS="$UNRESOLVED_EVENTS $evdir"
+				return;
+			fi
+		done
+		DEFINED_VARS="$DEFINED_VARS "`defined_vars $evdir`
+	fi
+	grep -v "^#" $evdir/trigger | while read action active; do
+		emit_kv $PREFIX.event.$group.$event.actions += \'$action\'
+	done
+
+	# enable is not checked; this is done by set_event in the instance.
+	val=`cat $evdir/filter`
+	if [ "$val" != "none" ]; then
+		emit_kv $PREFIX.event.$group.$event.filter = "$val"
+	fi
+}
+
+retry_unresolved() {
+	unresolved=$UNRESOLVED_EVENTS
+	UNRESOLVED_EVENTS=
+	for evdir in $unresolved; do
+		event=${evdir##*/}
+		group=${evdir%/*}; group=${group##*/}
+		per_event_options $evdir
+	done
+}
+
+event_options() {
+	# PREFIX and INSTANCE must be set
+	if [ $PREFIX = "ftrace" ]; then
+		# define the dynamic events
+		kprobe_event_options
+		synth_event_options
+	fi
+	for group in `ls $INSTANCE/events/` ; do
+		[ ! -d $INSTANCE/events/$group ] && continue
+		for event in `ls $INSTANCE/events/$group/` ;do
+			[ ! -d $INSTANCE/events/$group/$event ] && continue
+			per_event_options $INSTANCE/events/$group/$event
+		done
+	done
+	retry=0
+	while [ $retry -lt 3 ]; do
+		retry_unresolved
+		retry=$((retry + 1))
+	done
+	if [ "$UNRESOLVED_EVENTS" ]; then
+		cat 1>&2 << EOF
+! ERROR: hist triggers in $UNRESOLVED_EVENTS use some undefined variables.
+EOF
+	fi
+}
+
+is_default_trace_option() { # option
+grep -qw $1 << EOF
+print-parent
+nosym-offset
+nosym-addr
+noverbose
+noraw
+nohex
+nobin
+noblock
+trace_printk
+annotate
+nouserstacktrace
+nosym-userobj
+noprintk-msg-only
+context-info
+nolatency-format
+record-cmd
+norecord-tgid
+overwrite
+nodisable_on_free
+irq-info
+markers
+noevent-fork
+nopause-on-trace
+function-trace
+nofunction-fork
+nodisplay-graph
+nostacktrace
+notest_nop_accept
+notest_nop_refuse
+EOF
+}
+
+instance_options() { # [instance-name]
+	if [ $# -eq 0 ]; then
+		PREFIX="ftrace"
+		INSTANCE=$TRACEFS
+	else
+		PREFIX="ftrace.instance.$1"
+		INSTANCE=$TRACEFS/instances/$1
+	fi
+	val=
+	for i in `cat $INSTANCE/trace_options`; do
+		is_default_trace_option $i && continue
+		val="$val, $i"
+	done
+	[ "$val" ] && emit_kv $PREFIX.options = "${val#,}"
+	val="local"
+	for i in `cat $INSTANCE/trace_clock` ; do
+		[ "${i#*]}" ] && continue
+		i=${i%]}; val=${i#[}
+	done
+	[ $val != "local" ] && emit_kv $PREFIX.trace_clock = $val
+	val=`cat $INSTANCE/buffer_size_kb`
+	if echo $val | grep -vq "expanded" ; then
+		emit_kv $PREFIX.buffer_size = $val"KB"
+	fi
+	if grep -q "is allocated" $INSTANCE/snapshot ; then
+		emit_kv $PREFIX.alloc_snapshot
+	fi
+	val=`cat $INSTANCE/tracing_cpumask`
+	if [ `echo $val | sed -e s/f//g`x != x ]; then
+		emit_kv $PREFIX.cpumask = $val
+	fi
+
+	val=
+	for i in `cat $INSTANCE/set_event`; do
+		val="$val, $i"
+	done
+	[ "$val" ] && emit_kv $PREFIX.events = "${val#,}"
+	val=`cat $INSTANCE/current_tracer`
+	[ $val != nop ] && emit_kv $PREFIX.tracer = $val
+	if grep -qv "^#" $INSTANCE/set_ftrace_filter $INSTANCE/set_ftrace_notrace; then
+		cat 1>&2 << EOF
+# WARN: kernel.ftrace.filters and kernel.ftrace.notrace are not supported, since the wild card expression was expanded and lost from memory.
+EOF
+	fi
+	event_options
+}
+
+global_options
+instance_options
+for i in `ls $TRACEFS/instances` ; do
+	instance_options $i
+done

From 5675fd4ef51f0b505a7f802e4d23a37336d521f0 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Mon, 10 Aug 2020 17:35:32 +0900
Subject: [PATCH 06/58] tools/bootconfig: Add --init option for bconf2ftrace.sh

Since the ftrace current setting may conflict with the new setting
from bootconfig, add the --init option to initialize ftrace before
setting for bconf2ftrace.sh.

E.g.
 $ bconf2ftrace.sh --init boottrace.bconf

This initialization method copied from selftests/ftrace.

Link: https://lkml.kernel.org/r/159704853203.175360.17029578033994278231.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tools/bootconfig/scripts/bconf2ftrace.sh |  12 ++-
 tools/bootconfig/scripts/ftrace.sh       | 109 +++++++++++++++++++++++
 2 files changed, 120 insertions(+), 1 deletion(-)
 create mode 100644 tools/bootconfig/scripts/ftrace.sh

diff --git a/tools/bootconfig/scripts/bconf2ftrace.sh b/tools/bootconfig/scripts/bconf2ftrace.sh
index a46e984fb2ff..595e164dc352 100755
--- a/tools/bootconfig/scripts/bconf2ftrace.sh
+++ b/tools/bootconfig/scripts/bconf2ftrace.sh
@@ -3,8 +3,9 @@
 
 usage() {
 	echo "Ftrace boottime trace test tool"
-	echo "Usage: $0 [--apply] [--debug] BOOTCONFIG-FILE"
+	echo "Usage: $0 [--apply|--init] [--debug] BOOTCONFIG-FILE"
 	echo "    --apply: Test actual apply to tracefs (need sudo)"
+	echo "    --init:  Initialize ftrace before applying (imply --apply)"
 	exit 1
 }
 
@@ -13,12 +14,16 @@ usage() {
 BCONF=
 DEBUG=
 APPLY=
+INIT=
 while [ x"$1" != x ]; do
 	case "$1" in
 	"--debug")
 		DEBUG=$1;;
 	"--apply")
 		APPLY=$1;;
+	"--init")
+		APPLY=$1
+		INIT=$1;;
 	*)
 		[ ! -f $1 ] && usage
 		BCONF=$1;;
@@ -57,6 +62,11 @@ if [ -z "$TRACEFS" ]; then
 	fi
 fi
 
+if [ x"$INIT" != x ]; then
+	. `dirname $0`/ftrace.sh
+	(cd $TRACEFS; initialize_ftrace)
+fi
+
 . `dirname $0`/xbc.sh
 
 ######## main #########
diff --git a/tools/bootconfig/scripts/ftrace.sh b/tools/bootconfig/scripts/ftrace.sh
new file mode 100644
index 000000000000..186eed923041
--- /dev/null
+++ b/tools/bootconfig/scripts/ftrace.sh
@@ -0,0 +1,109 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+clear_trace() { # reset trace output
+    echo > trace
+}
+
+disable_tracing() { # stop trace recording
+    echo 0 > tracing_on
+}
+
+enable_tracing() { # start trace recording
+    echo 1 > tracing_on
+}
+
+reset_tracer() { # reset the current tracer
+    echo nop > current_tracer
+}
+
+reset_trigger_file() {
+    # remove action triggers first
+    grep -H ':on[^:]*(' $@ |
+    while read line; do
+        cmd=`echo $line | cut -f2- -d: | cut -f1 -d"["`
+	file=`echo $line | cut -f1 -d:`
+	echo "!$cmd" >> $file
+    done
+    grep -Hv ^# $@ |
+    while read line; do
+        cmd=`echo $line | cut -f2- -d: | cut -f1 -d"["`
+	file=`echo $line | cut -f1 -d:`
+	echo "!$cmd" > $file
+    done
+}
+
+reset_trigger() { # reset all current setting triggers
+    if [ -d events/synthetic ]; then
+        reset_trigger_file events/synthetic/*/trigger
+    fi
+    reset_trigger_file events/*/*/trigger
+}
+
+reset_events_filter() { # reset all current setting filters
+    grep -v ^none events/*/*/filter |
+    while read line; do
+	echo 0 > `echo $line | cut -f1 -d:`
+    done
+}
+
+reset_ftrace_filter() { # reset all triggers in set_ftrace_filter
+    if [ ! -f set_ftrace_filter ]; then
+      return 0
+    fi
+    echo > set_ftrace_filter
+    grep -v '^#' set_ftrace_filter | while read t; do
+	tr=`echo $t | cut -d: -f2`
+	if [ "$tr" = "" ]; then
+	    continue
+	fi
+	if ! grep -q "$t" set_ftrace_filter; then
+		continue;
+	fi
+	name=`echo $t | cut -d: -f1 | cut -d' ' -f1`
+	if [ $tr = "enable_event" -o $tr = "disable_event" ]; then
+	    tr=`echo $t | cut -d: -f2-4`
+	    limit=`echo $t | cut -d: -f5`
+	else
+	    tr=`echo $t | cut -d: -f2`
+	    limit=`echo $t | cut -d: -f3`
+	fi
+	if [ "$limit" != "unlimited" ]; then
+	    tr="$tr:$limit"
+	fi
+	echo "!$name:$tr" > set_ftrace_filter
+    done
+}
+
+disable_events() {
+    echo 0 > events/enable
+}
+
+clear_synthetic_events() { # reset all current synthetic events
+    grep -v ^# synthetic_events |
+    while read line; do
+        echo "!$line" >> synthetic_events
+    done
+}
+
+initialize_ftrace() { # Reset ftrace to initial-state
+# As the initial state, ftrace will be set to nop tracer,
+# no events, no triggers, no filters, no function filters,
+# no probes, and tracing on.
+    disable_tracing
+    reset_tracer
+    reset_trigger
+    reset_events_filter
+    reset_ftrace_filter
+    disable_events
+    [ -f set_event_pid ] && echo > set_event_pid
+    [ -f set_ftrace_pid ] && echo > set_ftrace_pid
+    [ -f set_ftrace_notrace ] && echo > set_ftrace_notrace
+    [ -f set_graph_function ] && echo | tee set_graph_*
+    [ -f stack_trace_filter ] && echo > stack_trace_filter
+    [ -f kprobe_events ] && echo > kprobe_events
+    [ -f uprobe_events ] && echo > uprobe_events
+    [ -f synthetic_events ] && echo > synthetic_events
+    [ -f snapshot ] && echo 0 > snapshot
+    clear_trace
+    enable_tracing
+}

From dc300d77b86a122d3fd099206e1adf699ed80bd7 Mon Sep 17 00:00:00 2001
From: Wei Yang <richard.weiyang@linux.alibaba.com>
Date: Sun, 12 Jul 2020 09:10:35 +0800
Subject: [PATCH 07/58] tracing: toplevel d_entry already initialized

Currently we have following call flow:

    tracer_init_tracefs()
        tracing_init_dentry()
        event_trace_init()
            tracing_init_dentry()

This shows tracing_init_dentry() is called twice in this flow and this
is not necessary.

Let's remove the second one when it is for sure be properly initialized.

Link: https://lkml.kernel.org/r/20200712011036.70948-4-richard.weiyang@linux.alibaba.com

Signed-off-by: Wei Yang <richard.weiyang@linux.alibaba.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_events.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index a85effb2373b..ee25d849ebba 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -3434,7 +3434,6 @@ early_initcall(event_trace_enable_again);
 __init int event_trace_init(void)
 {
 	struct trace_array *tr;
-	struct dentry *d_tracer;
 	struct dentry *entry;
 	int ret;
 
@@ -3442,11 +3441,7 @@ __init int event_trace_init(void)
 	if (!tr)
 		return -ENODEV;
 
-	d_tracer = tracing_init_dentry();
-	if (IS_ERR(d_tracer))
-		return 0;
-
-	entry = tracefs_create_file("available_events", 0444, d_tracer,
+	entry = tracefs_create_file("available_events", 0444, NULL,
 				    tr, &ftrace_avail_fops);
 	if (!entry)
 		pr_warn("Could not create tracefs 'available_events' entry\n");
@@ -3457,7 +3452,7 @@ __init int event_trace_init(void)
 	if (trace_define_common_fields())
 		pr_warn("tracing: Failed to allocate common fields");
 
-	ret = early_event_add_tracer(d_tracer, tr);
+	ret = early_event_add_tracer(NULL, tr);
 	if (ret)
 		return ret;
 

From 22c36b18263426bdd97ef5e04c0e92224c612ee1 Mon Sep 17 00:00:00 2001
From: Wei Yang <richard.weiyang@linux.alibaba.com>
Date: Sun, 12 Jul 2020 09:10:36 +0800
Subject: [PATCH 08/58] tracing: make tracing_init_dentry() returns an integer
 instead of a d_entry pointer

Current tracing_init_dentry() return a d_entry pointer, while is not
necessary. This function returns NULL on success or error on failure,
which means there is no valid d_entry pointer return.

Let's return 0 on success and negative value for error.

Link: https://lkml.kernel.org/r/20200712011036.70948-5-richard.weiyang@linux.alibaba.com

Signed-off-by: Wei Yang <richard.weiyang@linux.alibaba.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c                 | 36 ++++++++++++++--------------
 kernel/trace/trace.h                 |  2 +-
 kernel/trace/trace_dynevent.c        |  8 +++----
 kernel/trace/trace_events_synth.c    |  9 +++----
 kernel/trace/trace_functions_graph.c |  8 +++----
 kernel/trace/trace_hwlat.c           |  8 +++----
 kernel/trace/trace_kprobe.c          | 10 ++++----
 kernel/trace/trace_printk.c          |  8 +++----
 kernel/trace/trace_stack.c           | 12 +++++-----
 kernel/trace/trace_stat.c            |  8 +++----
 kernel/trace/trace_uprobe.c          |  9 ++++---
 11 files changed, 57 insertions(+), 61 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 2a7c26345e83..29a9034b38d0 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -8971,21 +8971,21 @@ static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
  * directory. It is called via fs_initcall() by any of the boot up code
  * and expects to return the dentry of the top level tracing directory.
  */
-struct dentry *tracing_init_dentry(void)
+int tracing_init_dentry(void)
 {
 	struct trace_array *tr = &global_trace;
 
 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
 		pr_warn("Tracing disabled due to lockdown\n");
-		return ERR_PTR(-EPERM);
+		return -EPERM;
 	}
 
 	/* The top level trace array uses  NULL as parent */
 	if (tr->dir)
-		return NULL;
+		return 0;
 
 	if (WARN_ON(!tracefs_initialized()))
-		return ERR_PTR(-ENODEV);
+		return -ENODEV;
 
 	/*
 	 * As there may still be users that expect the tracing
@@ -8996,7 +8996,7 @@ struct dentry *tracing_init_dentry(void)
 	tr->dir = debugfs_create_automount("tracing", NULL,
 					   trace_automount, NULL);
 
-	return NULL;
+	return 0;
 }
 
 extern struct trace_eval_map *__start_ftrace_eval_maps[];
@@ -9083,48 +9083,48 @@ static struct notifier_block trace_module_nb = {
 
 static __init int tracer_init_tracefs(void)
 {
-	struct dentry *d_tracer;
+	int ret;
 
 	trace_access_lock_init();
 
-	d_tracer = tracing_init_dentry();
-	if (IS_ERR(d_tracer))
+	ret = tracing_init_dentry();
+	if (ret)
 		return 0;
 
 	event_trace_init();
 
-	init_tracer_tracefs(&global_trace, d_tracer);
-	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
+	init_tracer_tracefs(&global_trace, NULL);
+	ftrace_init_tracefs_toplevel(&global_trace, NULL);
 
-	trace_create_file("tracing_thresh", 0644, d_tracer,
+	trace_create_file("tracing_thresh", 0644, NULL,
 			&global_trace, &tracing_thresh_fops);
 
-	trace_create_file("README", 0444, d_tracer,
+	trace_create_file("README", 0444, NULL,
 			NULL, &tracing_readme_fops);
 
-	trace_create_file("saved_cmdlines", 0444, d_tracer,
+	trace_create_file("saved_cmdlines", 0444, NULL,
 			NULL, &tracing_saved_cmdlines_fops);
 
-	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
+	trace_create_file("saved_cmdlines_size", 0644, NULL,
 			  NULL, &tracing_saved_cmdlines_size_fops);
 
-	trace_create_file("saved_tgids", 0444, d_tracer,
+	trace_create_file("saved_tgids", 0444, NULL,
 			NULL, &tracing_saved_tgids_fops);
 
 	trace_eval_init();
 
-	trace_create_eval_file(d_tracer);
+	trace_create_eval_file(NULL);
 
 #ifdef CONFIG_MODULES
 	register_module_notifier(&trace_module_nb);
 #endif
 
 #ifdef CONFIG_DYNAMIC_FTRACE
-	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
+	trace_create_file("dyn_ftrace_total_info", 0444, NULL,
 			NULL, &tracing_dyn_info_fops);
 #endif
 
-	create_trace_instances(d_tracer);
+	create_trace_instances(NULL);
 
 	update_tracer_options(&global_trace);
 
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 610d21355526..0d3a405fe446 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -737,7 +737,7 @@ struct dentry *trace_create_file(const char *name,
 				 void *data,
 				 const struct file_operations *fops);
 
-struct dentry *tracing_init_dentry(void);
+int tracing_init_dentry(void);
 
 struct ring_buffer_event;
 
diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c
index 9f2e8520b748..9442a9bb080e 100644
--- a/kernel/trace/trace_dynevent.c
+++ b/kernel/trace/trace_dynevent.c
@@ -206,14 +206,14 @@ static const struct file_operations dynamic_events_ops = {
 /* Make a tracefs interface for controlling dynamic events */
 static __init int init_dynamic_event(void)
 {
-	struct dentry *d_tracer;
 	struct dentry *entry;
+	int ret;
 
-	d_tracer = tracing_init_dentry();
-	if (IS_ERR(d_tracer))
+	ret = tracing_init_dentry();
+	if (ret)
 		return 0;
 
-	entry = tracefs_create_file("dynamic_events", 0644, d_tracer,
+	entry = tracefs_create_file("dynamic_events", 0644, NULL,
 				    NULL, &dynamic_events_ops);
 
 	/* Event list interface */
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index c6cca0d1d584..f86a2aa0bccd 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -1757,7 +1757,6 @@ static const struct file_operations synth_events_fops = {
 static __init int trace_events_synth_init(void)
 {
 	struct dentry *entry = NULL;
-	struct dentry *d_tracer;
 	int err = 0;
 
 	err = dyn_event_register(&synth_event_ops);
@@ -1766,13 +1765,11 @@ static __init int trace_events_synth_init(void)
 		return err;
 	}
 
-	d_tracer = tracing_init_dentry();
-	if (IS_ERR(d_tracer)) {
-		err = PTR_ERR(d_tracer);
+	err = tracing_init_dentry();
+	if (err)
 		goto err;
-	}
 
-	entry = tracefs_create_file("synthetic_events", 0644, d_tracer,
+	entry = tracefs_create_file("synthetic_events", 0644, NULL,
 				    NULL, &synth_events_fops);
 	if (!entry) {
 		err = -ENODEV;
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 4a9c49c08ec9..60d66278aa0d 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -1336,13 +1336,13 @@ static const struct file_operations graph_depth_fops = {
 
 static __init int init_graph_tracefs(void)
 {
-	struct dentry *d_tracer;
+	int ret;
 
-	d_tracer = tracing_init_dentry();
-	if (IS_ERR(d_tracer))
+	ret = tracing_init_dentry();
+	if (ret)
 		return 0;
 
-	trace_create_file("max_graph_depth", 0644, d_tracer,
+	trace_create_file("max_graph_depth", 0644, NULL,
 			  NULL, &graph_depth_fops);
 
 	return 0;
diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c
index 17873e5d0353..c9ad5c6fbaad 100644
--- a/kernel/trace/trace_hwlat.c
+++ b/kernel/trace/trace_hwlat.c
@@ -538,14 +538,14 @@ static const struct file_operations window_fops = {
  */
 static int init_tracefs(void)
 {
-	struct dentry *d_tracer;
+	int ret;
 	struct dentry *top_dir;
 
-	d_tracer = tracing_init_dentry();
-	if (IS_ERR(d_tracer))
+	ret = tracing_init_dentry();
+	if (ret)
 		return -ENOMEM;
 
-	top_dir = tracefs_create_dir("hwlat_detector", d_tracer);
+	top_dir = tracefs_create_dir("hwlat_detector", NULL);
 	if (!top_dir)
 		return -ENOMEM;
 
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index aefb6065b508..feca9b19cd74 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1901,14 +1901,14 @@ subsys_initcall(init_kprobe_trace_early);
 /* Make a tracefs interface for controlling probe points */
 static __init int init_kprobe_trace(void)
 {
-	struct dentry *d_tracer;
+	int ret;
 	struct dentry *entry;
 
-	d_tracer = tracing_init_dentry();
-	if (IS_ERR(d_tracer))
+	ret = tracing_init_dentry();
+	if (ret)
 		return 0;
 
-	entry = tracefs_create_file("kprobe_events", 0644, d_tracer,
+	entry = tracefs_create_file("kprobe_events", 0644, NULL,
 				    NULL, &kprobe_events_ops);
 
 	/* Event list interface */
@@ -1916,7 +1916,7 @@ static __init int init_kprobe_trace(void)
 		pr_warn("Could not create tracefs 'kprobe_events' entry\n");
 
 	/* Profile interface */
-	entry = tracefs_create_file("kprobe_profile", 0444, d_tracer,
+	entry = tracefs_create_file("kprobe_profile", 0444, NULL,
 				    NULL, &kprobe_profile_ops);
 
 	if (!entry)
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index d4e31e969206..71b2e0fdc3e0 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -367,13 +367,13 @@ static const struct file_operations ftrace_formats_fops = {
 
 static __init int init_trace_printk_function_export(void)
 {
-	struct dentry *d_tracer;
+	int ret;
 
-	d_tracer = tracing_init_dentry();
-	if (IS_ERR(d_tracer))
+	ret = tracing_init_dentry();
+	if (ret)
 		return 0;
 
-	trace_create_file("printk_formats", 0444, d_tracer,
+	trace_create_file("printk_formats", 0444, NULL,
 				    NULL, &ftrace_formats_fops);
 
 	return 0;
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 98bba4764c52..c408423e5d65 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -554,20 +554,20 @@ __setup("stacktrace", enable_stacktrace);
 
 static __init int stack_trace_init(void)
 {
-	struct dentry *d_tracer;
+	int ret;
 
-	d_tracer = tracing_init_dentry();
-	if (IS_ERR(d_tracer))
+	ret = tracing_init_dentry();
+	if (ret)
 		return 0;
 
-	trace_create_file("stack_max_size", 0644, d_tracer,
+	trace_create_file("stack_max_size", 0644, NULL,
 			&stack_trace_max_size, &stack_max_size_fops);
 
-	trace_create_file("stack_trace", 0444, d_tracer,
+	trace_create_file("stack_trace", 0444, NULL,
 			NULL, &stack_trace_fops);
 
 #ifdef CONFIG_DYNAMIC_FTRACE
-	trace_create_file("stack_trace_filter", 0644, d_tracer,
+	trace_create_file("stack_trace_filter", 0644, NULL,
 			  &trace_ops, &stack_trace_filter_fops);
 #endif
 
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index d1fa19773cc8..8d141c3825a9 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -276,13 +276,13 @@ static const struct file_operations tracing_stat_fops = {
 
 static int tracing_stat_init(void)
 {
-	struct dentry *d_tracing;
+	int ret;
 
-	d_tracing = tracing_init_dentry();
-	if (IS_ERR(d_tracing))
+	ret = tracing_init_dentry();
+	if (ret)
 		return -ENODEV;
 
-	stat_dir = tracefs_create_dir("trace_stat", d_tracing);
+	stat_dir = tracefs_create_dir("trace_stat", NULL);
 	if (!stat_dir) {
 		pr_warn("Could not create tracefs 'trace_stat' entry\n");
 		return -ENOMEM;
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index f4286c9bdeb4..56729c6b6614 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -1625,21 +1625,20 @@ void destroy_local_trace_uprobe(struct trace_event_call *event_call)
 /* Make a trace interface for controling probe points */
 static __init int init_uprobe_trace(void)
 {
-	struct dentry *d_tracer;
 	int ret;
 
 	ret = dyn_event_register(&trace_uprobe_ops);
 	if (ret)
 		return ret;
 
-	d_tracer = tracing_init_dentry();
-	if (IS_ERR(d_tracer))
+	ret = tracing_init_dentry();
+	if (ret)
 		return 0;
 
-	trace_create_file("uprobe_events", 0644, d_tracer,
+	trace_create_file("uprobe_events", 0644, NULL,
 				    NULL, &uprobe_events_ops);
 	/* Profile interface */
-	trace_create_file("uprobe_profile", 0444, d_tracer,
+	trace_create_file("uprobe_profile", 0444, NULL,
 				    NULL, &uprobe_profile_ops);
 	return 0;
 }

From 5c8c206e4308ee33dea7c60b0cfcbed48a6438b4 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Thu, 6 Aug 2020 20:32:59 -0700
Subject: [PATCH 09/58] tracing: Delete repeated words in comments

Drop repeated words in kernel/trace/.
{and, the, not}

Link: https://lkml.kernel.org/r/20200807033259.13778-1-rdunlap@infradead.org

Cc: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c             | 2 +-
 kernel/trace/trace.c              | 2 +-
 kernel/trace/trace_dynevent.c     | 2 +-
 kernel/trace/trace_events_synth.c | 2 +-
 kernel/trace/tracing_map.c        | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 603255f5f085..84f32dbc7be8 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2402,7 +2402,7 @@ struct ftrace_ops direct_ops = {
  *
  * If the record has the FTRACE_FL_REGS set, that means that it
  * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS
- * is not not set, then it wants to convert to the normal callback.
+ * is not set, then it wants to convert to the normal callback.
  *
  * Returns the address of the trampoline to set to
  */
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 29a9034b38d0..8fac7d6db222 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -9287,7 +9287,7 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
 	}
 
 	/*
-	 * We need to stop all tracing on all CPUS to read the
+	 * We need to stop all tracing on all CPUS to read
 	 * the next buffer. This is a bit expensive, but is
 	 * not done often. We fill all what we can read,
 	 * and then release the locks again.
diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c
index 9442a9bb080e..5fa49cfd2bb6 100644
--- a/kernel/trace/trace_dynevent.c
+++ b/kernel/trace/trace_dynevent.c
@@ -402,7 +402,7 @@ void dynevent_arg_init(struct dynevent_arg *arg,
  * whitespace, all followed by a separator, if applicable.  After the
  * first arg string is successfully appended to the command string,
  * the optional @operator is appended, followed by the second arg and
- * and optional @separator.  If no separator was specified when
+ * optional @separator.  If no separator was specified when
  * initializing the arg, a space will be appended.
  */
 void dynevent_arg_pair_init(struct dynevent_arg_pair *arg_pair,
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index f86a2aa0bccd..7c765e80e974 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -1211,7 +1211,7 @@ __synth_event_trace_start(struct trace_event_file *file,
 	 * ENABLED bit is set (which attaches the probe thus allowing
 	 * this code to be called, etc).  Because this is called
 	 * directly by the user, we don't have that but we still need
-	 * to honor not logging when disabled.  For the the iterated
+	 * to honor not logging when disabled.  For the iterated
 	 * trace case, we save the enabed state upon start and just
 	 * ignore the following data calls.
 	 */
diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c
index 74738c9856f1..4b50fc0cb12c 100644
--- a/kernel/trace/tracing_map.c
+++ b/kernel/trace/tracing_map.c
@@ -260,7 +260,7 @@ int tracing_map_add_var(struct tracing_map *map)
  * to use cmp_fn.
  *
  * A key can be a subset of a compound key; for that purpose, the
- * offset param is used to describe where within the the compound key
+ * offset param is used to describe where within the compound key
  * the key referenced by this key field resides.
  *
  * Return: The index identifying the field in the map and associated

From b427e765bdffcc18911ace199a17b09332a47d55 Mon Sep 17 00:00:00 2001
From: Xianting Tian <tian.xianting@h3c.com>
Date: Thu, 13 Aug 2020 19:28:03 +0800
Subject: [PATCH 10/58] tracing: Use __this_cpu_read() in
 trace_buffered_event_enable()

The code is executed with preemption disabled, so it's
safe to use __this_cpu_read().

Link: https://lkml.kernel.org/r/20200813112803.12256-1-tian.xianting@h3c.com

Signed-off-by: Xianting Tian <tian.xianting@h3c.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8fac7d6db222..1c4ca25944ba 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2511,7 +2511,7 @@ void trace_buffered_event_enable(void)
 
 		preempt_disable();
 		if (cpu == smp_processor_id() &&
-		    this_cpu_read(trace_buffered_event) !=
+		    __this_cpu_read(trace_buffered_event) !=
 		    per_cpu(trace_buffered_event, cpu))
 			WARN_ON_ONCE(1);
 		preempt_enable();

From f3d36426618ee2b2d1fa99aefb5fe4d2dc33807e Mon Sep 17 00:00:00 2001
From: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Date: Tue, 18 Aug 2020 08:08:57 +0300
Subject: [PATCH 11/58] kprobes: Use module_name() macro

It is advised to use module_name() macro instead of dereferencing mod->name
directly. This makes sense for consistencys sake and also it prevents a
hard dependency to CONFIG_MODULES.

Link: https://lkml.kernel.org/r/20200818050857.117998-1-jarkko.sakkinen@linux.intel.com

Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: linux-mm@kvack.org
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Jessica Yu <jeyu@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>,
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Will Deacon <will@kernel.org>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_kprobe.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index feca9b19cd74..f8e46929ceba 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -106,9 +106,10 @@ static nokprobe_inline bool trace_kprobe_has_gone(struct trace_kprobe *tk)
 static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk,
 						 struct module *mod)
 {
-	int len = strlen(mod->name);
+	int len = strlen(module_name(mod));
 	const char *name = trace_kprobe_symbol(tk);
-	return strncmp(mod->name, name, len) == 0 && name[len] == ':';
+
+	return strncmp(module_name(mod), name, len) == 0 && name[len] == ':';
 }
 
 static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk)
@@ -688,7 +689,7 @@ static int trace_kprobe_module_callback(struct notifier_block *nb,
 			if (ret)
 				pr_warn("Failed to re-register probe %s on %s: %d\n",
 					trace_probe_name(&tk->tp),
-					mod->name, ret);
+					module_name(mod), ret);
 		}
 	}
 	mutex_unlock(&event_mutex);

From eb8d8b4c9848b200586aa98e105b39f159656ba6 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 5 Sep 2020 15:50:20 +0300
Subject: [PATCH 12/58] tracing: remove a pointless assignment

The "tr" is a stack variable so setting it to NULL before a return is
a no-op.  Delete the assignment.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 1c4ca25944ba..55b829863127 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -8799,7 +8799,6 @@ static int __remove_instance(struct trace_array *tr)
 	free_cpumask_var(tr->tracing_cpumask);
 	kfree(tr->name);
 	kfree(tr);
-	tr = NULL;
 
 	return 0;
 }

From 40d14da383670db21a09e63d52db8dee9b77741e Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Sun, 6 Sep 2020 18:33:26 -0700
Subject: [PATCH 13/58] fgraph: Convert ret_stack tasklist scanning to rcu

It seems that alloc_retstack_tasklist() can also take a lockless
approach for scanning the tasklist, instead of using the big global
tasklist_lock. For this we also kill another deprecated and rcu-unsafe
tsk->thread_group user replacing it with for_each_process_thread(),
maintaining semantics.

Here tasklist_lock does not protect anything other than the list
against concurrent fork/exit. And considering that the whole thing
is capped by FTRACE_RETSTACK_ALLOC_SIZE (32), it should not be a
problem to have a pontentially stale, yet stable, list. The task cannot
go away either, so we don't risk racing with ftrace_graph_exit_task()
which clears the retstack.

The tsk->ret_stack management is not protected by tasklist_lock, being
serialized with the corresponding publish/subscribe barriers against
concurrent ftrace_push_return_trace(). In addition this plays nicer
with cachelines by avoiding two atomic ops in the uncontended case.

Link: https://lkml.kernel.org/r/20200907013326.9870-1-dave@stgolabs.net

Acked-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/fgraph.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 1af321dec0f1..5658f13037b3 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -387,8 +387,8 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
 		}
 	}
 
-	read_lock(&tasklist_lock);
-	do_each_thread(g, t) {
+	rcu_read_lock();
+	for_each_process_thread(g, t) {
 		if (start == end) {
 			ret = -EAGAIN;
 			goto unlock;
@@ -403,10 +403,10 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
 			smp_wmb();
 			t->ret_stack = ret_stack_list[start++];
 		}
-	} while_each_thread(g, t);
+	}
 
 unlock:
-	read_unlock(&tasklist_lock);
+	rcu_read_unlock();
 free:
 	for (i = start; i < end; i++)
 		kfree(ret_stack_list[i]);

From 8490db06f914100fc8a5110481cbd37d8968be90 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 10 Sep 2020 17:55:15 +0900
Subject: [PATCH 14/58] tracing/boot: Add per-instance tracing_on option
 support

Add per-instance tracing_on option, which will be useful with
traceon/traceoff event trigger actions. For example, if we
disable tracing_on by default and set traceon and traceoff on
a pair of events, we can trace functions between the pair of
events.

Link: https://lkml.kernel.org/r/159972811538.428528.2561315102284268611.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_boot.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index fa0fc08c6ef8..d52d441a17e8 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -40,6 +40,16 @@ trace_boot_set_instance_options(struct trace_array *tr, struct xbc_node *node)
 			pr_err("Failed to set option: %s\n", buf);
 	}
 
+	p = xbc_node_find_value(node, "tracing_on", NULL);
+	if (p && *p != '\0') {
+		if (kstrtoul(p, 10, &v))
+			pr_err("Failed to set tracing on: %s\n", p);
+		if (v)
+			tracer_tracing_on(tr);
+		else
+			tracer_tracing_off(tr);
+	}
+
 	p = xbc_node_find_value(node, "trace_clock", NULL);
 	if (p && *p != '\0') {
 		if (tracing_set_clock(tr, p) < 0)

From c51ba517989efb4be3d8658f31c98786d88a89fe Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Fri, 11 Sep 2020 09:47:15 +0900
Subject: [PATCH 15/58] Documentation: tracing: Add tracing_on option to
 boot-time tracer

Add tracing_on option description to the boot-time tracer.

Link: https://lkml.kernel.org/r/159978523520.485820.9250337223076929279.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 Documentation/trace/boottime-trace.rst | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Documentation/trace/boottime-trace.rst b/Documentation/trace/boottime-trace.rst
index dcb390075ca1..9bc8aceb8c0a 100644
--- a/Documentation/trace/boottime-trace.rst
+++ b/Documentation/trace/boottime-trace.rst
@@ -61,6 +61,10 @@ These options can be used for each instance including global ftrace node.
 ftrace.[instance.INSTANCE.]options = OPT1[, OPT2[...]]
    Enable given ftrace options.
 
+ftrace.[instance.INSTANCE.]tracing_on = 0|1
+   Enable/Disable tracing on this instance when starting boot-time tracing.
+   (you can enable it by the "traceon" event trigger action)
+
 ftrace.[instance.INSTANCE.]trace_clock = CLOCK
    Set given CLOCK to ftrace's trace_clock.
 

From 4725cd89978c26405a20414f3a0fa6cbd2bf9aad Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 10 Sep 2020 17:55:35 +0900
Subject: [PATCH 16/58] tracing/kprobes: Support perf-style return probe

Support perf-style return probe ("SYMBOL%return") for kprobe events.
This will allow boot-time tracing user to define a return probe event.

Link: https://lkml.kernel.org/r/159972813535.428528.4437029657208468954.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c        |  2 +-
 kernel/trace/trace_kprobe.c | 18 +++++++++++++++++-
 kernel/trace/trace_probe.h  |  1 +
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 55b829863127..ca6da462326d 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5122,7 +5122,7 @@ static const char readme_msg[] =
 	"\t           -:[<group>/]<event>\n"
 #ifdef CONFIG_KPROBE_EVENTS
 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
-  "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
+  "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
 #endif
 #ifdef CONFIG_UPROBE_EVENTS
   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index f8e46929ceba..9d46415296eb 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -718,6 +718,9 @@ static int trace_kprobe_create(int argc, const char *argv[])
 	 *      p[:[GRP/]EVENT] [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS]
 	 *  - Add kretprobe:
 	 *      r[MAXACTIVE][:[GRP/]EVENT] [MOD:]KSYM[+0] [FETCHARGS]
+	 *    Or
+	 *      p:[GRP/]EVENT] [MOD:]KSYM[+0]%return [FETCHARGS]
+	 *
 	 * Fetch args:
 	 *  $retval	: fetch return value
 	 *  $stack	: fetch stack address
@@ -747,7 +750,6 @@ static int trace_kprobe_create(int argc, const char *argv[])
 	switch (argv[0][0]) {
 	case 'r':
 		is_return = true;
-		flags |= TPARG_FL_RETURN;
 		break;
 	case 'p':
 		break;
@@ -805,12 +807,26 @@ static int trace_kprobe_create(int argc, const char *argv[])
 		symbol = kstrdup(argv[1], GFP_KERNEL);
 		if (!symbol)
 			return -ENOMEM;
+
+		tmp = strchr(symbol, '%');
+		if (tmp) {
+			if (!strcmp(tmp, "%return")) {
+				*tmp = '\0';
+				is_return = true;
+			} else {
+				trace_probe_log_err(tmp - symbol, BAD_ADDR_SUFFIX);
+				goto parse_error;
+			}
+		}
+
 		/* TODO: support .init module functions */
 		ret = traceprobe_split_symbol_offset(symbol, &offset);
 		if (ret || offset < 0 || offset > UINT_MAX) {
 			trace_probe_log_err(0, BAD_PROBE_ADDR);
 			goto parse_error;
 		}
+		if (is_return)
+			flags |= TPARG_FL_RETURN;
 		if (kprobe_on_func_entry(NULL, symbol, offset))
 			flags |= TPARG_FL_FENTRY;
 		if (offset && is_return && !(flags & TPARG_FL_FENTRY)) {
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index a22b62813f8c..04d00987da69 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -404,6 +404,7 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call,
 	C(MAXACT_TOO_BIG,	"Maxactive is too big"),		\
 	C(BAD_PROBE_ADDR,	"Invalid probed address or symbol"),	\
 	C(BAD_RETPROBE,		"Retprobe address must be an function entry"), \
+	C(BAD_ADDR_SUFFIX,	"Invalid probed address suffix"), \
 	C(NO_GROUP_NAME,	"Group name is not specified"),		\
 	C(GROUP_TOO_LONG,	"Group name is too long"),		\
 	C(BAD_GROUP_NAME,	"Group name must follow the same rules as C identifiers"), \

From 3dd3aae32dc91efab916b28cf95986186c6e8d6b Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 10 Sep 2020 17:55:46 +0900
Subject: [PATCH 17/58] tracing/uprobes: Support perf-style return probe

Support perf-style return probe ("SYMBOL%return") for uprobe events
as same as kprobe events does.

Link: https://lkml.kernel.org/r/159972814601.428528.7641183316212425445.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c        |  2 +-
 kernel/trace/trace_uprobe.c | 15 ++++++++++++++-
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ca6da462326d..c35fcd2f2529 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5125,7 +5125,7 @@ static const char readme_msg[] =
   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
 #endif
 #ifdef CONFIG_UPROBE_EVENTS
-  "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
+  "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
 #endif
 	"\t     args: <name>=fetcharg[:type]\n"
 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 56729c6b6614..3cf7128e1ad3 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -528,7 +528,7 @@ static int register_trace_uprobe(struct trace_uprobe *tu)
 
 /*
  * Argument syntax:
- *  - Add uprobe: p|r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS]
+ *  - Add uprobe: p|r[:[GRP/]EVENT] PATH:OFFSET[%return][(REF)] [FETCHARGS]
  */
 static int trace_uprobe_create(int argc, const char **argv)
 {
@@ -617,6 +617,19 @@ static int trace_uprobe_create(int argc, const char **argv)
 		}
 	}
 
+	/* Check if there is %return suffix */
+	tmp = strchr(arg, '%');
+	if (tmp) {
+		if (!strcmp(tmp, "%return")) {
+			*tmp = '\0';
+			is_return = true;
+		} else {
+			trace_probe_log_err(tmp - filename, BAD_ADDR_SUFFIX);
+			ret = -EINVAL;
+			goto fail_address_parse;
+		}
+	}
+
 	/* Parse uprobe offset. */
 	ret = kstrtoul(arg, 0, &offset);
 	if (ret) {

From 638e476d1df6812fad0c700ee3e17727a4ff0d40 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 10 Sep 2020 17:55:56 +0900
Subject: [PATCH 18/58] Documentation: tracing: Add %return suffix description

Add a description of the %return suffix option for kprobe event and
uprobe event.

Link: https://lkml.kernel.org/r/159972815624.428528.10450874184415697524.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 Documentation/trace/kprobetrace.rst  | 2 ++
 Documentation/trace/uprobetracer.rst | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/Documentation/trace/kprobetrace.rst b/Documentation/trace/kprobetrace.rst
index c1709165c553..a18eb97a263c 100644
--- a/Documentation/trace/kprobetrace.rst
+++ b/Documentation/trace/kprobetrace.rst
@@ -30,6 +30,7 @@ Synopsis of kprobe_events
 
   p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS]	: Set a probe
   r[MAXACTIVE][:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS]	: Set a return probe
+  p:[GRP/]EVENT] [MOD:]SYM[+0]%return [FETCHARGS]	: Set a return probe
   -:[GRP/]EVENT						: Clear a probe
 
  GRP		: Group name. If omitted, use "kprobes" for it.
@@ -37,6 +38,7 @@ Synopsis of kprobe_events
 		  based on SYM+offs or MEMADDR.
  MOD		: Module name which has given SYM.
  SYM[+offs]	: Symbol+offset where the probe is inserted.
+ SYM%return	: Return address of the symbol
  MEMADDR	: Address where the probe is inserted.
  MAXACTIVE	: Maximum number of instances of the specified function that
 		  can be probed simultaneously, or 0 for the default value
diff --git a/Documentation/trace/uprobetracer.rst b/Documentation/trace/uprobetracer.rst
index 98cde99939d7..a8e5938f609e 100644
--- a/Documentation/trace/uprobetracer.rst
+++ b/Documentation/trace/uprobetracer.rst
@@ -28,6 +28,7 @@ Synopsis of uprobe_tracer
 
   p[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a uprobe
   r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a return uprobe (uretprobe)
+  p[:[GRP/]EVENT] PATH:OFFSET%return [FETCHARGS] : Set a return uprobe (uretprobe)
   -:[GRP/]EVENT                           : Clear uprobe or uretprobe event
 
   GRP           : Group name. If omitted, "uprobes" is the default value.
@@ -35,6 +36,7 @@ Synopsis of uprobe_tracer
                   on PATH+OFFSET.
   PATH          : Path to an executable or a library.
   OFFSET        : Offset where the probe is inserted.
+  OFFSET%return : Offset where the return probe is inserted.
 
   FETCHARGS     : Arguments. Each probe can have up to 128 args.
    %REG         : Fetch register REG

From 19c311ae7c0f4f2475617a8acb7ba7b7b4575f0e Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 10 Sep 2020 17:56:06 +0900
Subject: [PATCH 19/58] Documentation: tracing: boot: Add an example of tracing
 function-calls

Add an example of tracing function calls on a specific function.

Link: https://lkml.kernel.org/r/159972816669.428528.12390560334549382316.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 Documentation/trace/boottime-trace.rst | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/Documentation/trace/boottime-trace.rst b/Documentation/trace/boottime-trace.rst
index 9bc8aceb8c0a..ab3bfd67197c 100644
--- a/Documentation/trace/boottime-trace.rst
+++ b/Documentation/trace/boottime-trace.rst
@@ -168,6 +168,26 @@ is for tracing functions starting with "user\_", and others tracing
 The instance node also accepts event nodes so that each instance
 can customize its event tracing.
 
+With the trigger action and kprobes, you can trace function-graph while
+a function is called. For example, this will trace all function calls in
+the pci_proc_init()::
+
+  ftrace {
+        tracing_on = 0
+        tracer = function_graph
+        event.kprobes {
+                start_event {
+                        probes = "pci_proc_init"
+                        actions = "traceon"
+                }
+                end_event {
+                        probes = "pci_proc_init%return"
+                        actions = "traceoff"
+                }
+        }
+  }
+
+
 This boot-time tracing also supports ftrace kernel parameters via boot
 config.
 For example, following kernel parameters::

From 8f2a59968f961570fe7b9d99e3a615dd21477638 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 10 Sep 2020 17:56:16 +0900
Subject: [PATCH 20/58] selftests/ftrace: Add %return suffix tests

Add kprobe %return suffix testcase and syntax error tests
for %return suffix.

Link: https://lkml.kernel.org/r/159972817653.428528.9180599115849301184.stgit@devnote2

Acked-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 .../test.d/kprobe/kprobe_syntax_errors.tc     |  6 ++++++
 .../test.d/kprobe/kretprobe_return_suffix.tc  | 21 +++++++++++++++++++
 .../test.d/kprobe/uprobe_syntax_errors.tc     |  6 ++++++
 3 files changed, 33 insertions(+)
 create mode 100644 tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_return_suffix.tc

diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
index b4d834675e59..56b3f36c722b 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
@@ -97,4 +97,10 @@ check_error 'p:kprobes/testevent _do_fork ^abcd=\"foo"'	# DIFF_ARG_TYPE
 check_error '^p:kprobes/testevent _do_fork abcd=\1'	# SAME_PROBE
 fi
 
+# %return suffix errors
+if grep -q "place (kretprobe): .*%return.*" README; then
+check_error 'p vfs_read^%hoge'		# BAD_ADDR_SUFFIX
+check_error 'p ^vfs_read+10%return'	# BAD_RETPROBE
+fi
+
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_return_suffix.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_return_suffix.tc
new file mode 100644
index 000000000000..f07bd15cc033
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_return_suffix.tc
@@ -0,0 +1,21 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kretprobe %%return suffix test
+# requires: kprobe_events '<symbol>[+<offset>]%return':README
+
+# Test for kretprobe by "r"
+echo 'r:myprobeaccept vfs_read' > kprobe_events
+RESULT1=`cat kprobe_events`
+
+# Test for kretprobe by "%return"
+echo 'p:myprobeaccept vfs_read%return' > kprobe_events
+RESULT2=`cat kprobe_events`
+
+if [ "$RESULT1" != "$RESULT2" ]; then
+	echo "Error: %return suffix didn't make a return probe."
+	echo "r-command: $RESULT1"
+	echo "%return: $RESULT2"
+	exit_fail
+fi
+
+echo > kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc
index 7b5b60c3c5a2..f5e3f9e4a01f 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc
@@ -17,4 +17,10 @@ check_error 'p /bin/sh:10(10)^a'	# BAD_REFCNT_SUFFIX
 check_error 'p /bin/sh:10 ^@+ab'	# BAD_FILE_OFFS
 check_error 'p /bin/sh:10 ^@symbol'	# SYM_ON_UPROBE
 
+# %return suffix error
+if grep -q "place (uprobe): .*%return.*" README; then
+check_error 'p /bin/sh:10^%hoge'	# BAD_ADDR_SUFFIX
+check_error 'p /bin/sh:10(10)^%return'	# BAD_REFCNT_SUFFIX
+fi
+
 exit 0

From 36dadef23fcca55bb6531dc12822d3b165319ccc Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 10 Sep 2020 21:38:39 +0900
Subject: [PATCH 21/58] kprobes: Init kprobes in early_initcall

Init kprobes feature in early_initcall as same as jump_label and
dynamic_debug does, so that we can use kprobes events in earlier
boot stage.

Link: https://lkml.kernel.org/r/159974151897.478751.8342374158615496628.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/kprobes.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index d750e025d1ff..9303881aac84 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -2529,7 +2529,7 @@ static int __init init_kprobes(void)
 		init_test_probes();
 	return err;
 }
-subsys_initcall(init_kprobes);
+early_initcall(init_kprobes);
 
 #ifdef CONFIG_DEBUG_FS
 static void report_probe(struct seq_file *pi, struct kprobe *p,

From ac343da7bc9048629f9d12d98e8f0573df88836b Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 10 Sep 2020 21:38:48 +0900
Subject: [PATCH 22/58] tracing: Define event fields early stage

Define event fields at early stage so that boot-time tracing can
access the event fields (like per-event filter setting).

Link: https://lkml.kernel.org/r/159974152862.478751.2023768466808361350.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_events.c | 92 ++++++++++++++++++++++---------------
 1 file changed, 55 insertions(+), 37 deletions(-)

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index ee25d849ebba..8e87fbab6930 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2123,12 +2123,48 @@ event_subsystem_dir(struct trace_array *tr, const char *name,
 	return NULL;
 }
 
+static int
+event_define_fields(struct trace_event_call *call)
+{
+	struct list_head *head;
+	int ret = 0;
+
+	/*
+	 * Other events may have the same class. Only update
+	 * the fields if they are not already defined.
+	 */
+	head = trace_get_fields(call);
+	if (list_empty(head)) {
+		struct trace_event_fields *field = call->class->fields_array;
+		unsigned int offset = sizeof(struct trace_entry);
+
+		for (; field->type; field++) {
+			if (field->type == TRACE_FUNCTION_TYPE) {
+				field->define_fields(call);
+				break;
+			}
+
+			offset = ALIGN(offset, field->align);
+			ret = trace_define_field(call, field->type, field->name,
+						 offset, field->size,
+						 field->is_signed, field->filter_type);
+			if (WARN_ON_ONCE(ret)) {
+				pr_err("error code is %d\n", ret);
+				break;
+			}
+
+			offset += field->size;
+		}
+	}
+
+	return ret;
+}
+
 static int
 event_create_dir(struct dentry *parent, struct trace_event_file *file)
 {
 	struct trace_event_call *call = file->event_call;
 	struct trace_array *tr = file->tr;
-	struct list_head *head;
 	struct dentry *d_events;
 	const char *name;
 	int ret;
@@ -2162,35 +2198,10 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file)
 				  &ftrace_event_id_fops);
 #endif
 
-	/*
-	 * Other events may have the same class. Only update
-	 * the fields if they are not already defined.
-	 */
-	head = trace_get_fields(call);
-	if (list_empty(head)) {
-		struct trace_event_fields *field = call->class->fields_array;
-		unsigned int offset = sizeof(struct trace_entry);
-
-		for (; field->type; field++) {
-			if (field->type == TRACE_FUNCTION_TYPE) {
-				ret = field->define_fields(call);
-				break;
-			}
-
-			offset = ALIGN(offset, field->align);
-			ret = trace_define_field(call, field->type, field->name,
-						 offset, field->size,
-						 field->is_signed, field->filter_type);
-			if (ret)
-				break;
-
-			offset += field->size;
-		}
-		if (ret < 0) {
-			pr_warn("Could not initialize trace point events/%s\n",
-				name);
-			return -1;
-		}
+	ret = event_define_fields(call);
+	if (ret < 0) {
+		pr_warn("Could not initialize trace point events/%s\n", name);
+		return ret;
 	}
 
 	/*
@@ -2493,7 +2504,7 @@ __trace_early_add_new_event(struct trace_event_call *call,
 	if (!file)
 		return -ENOMEM;
 
-	return 0;
+	return event_define_fields(call);
 }
 
 struct ftrace_module_file_ops;
@@ -3431,6 +3442,18 @@ static __init int event_trace_enable_again(void)
 
 early_initcall(event_trace_enable_again);
 
+/* Init fields which doesn't related to the tracefs */
+static __init int event_trace_init_fields(void)
+{
+	if (trace_define_generic_fields())
+		pr_warn("tracing: Failed to allocated generic fields");
+
+	if (trace_define_common_fields())
+		pr_warn("tracing: Failed to allocate common fields");
+
+	return 0;
+}
+
 __init int event_trace_init(void)
 {
 	struct trace_array *tr;
@@ -3446,12 +3469,6 @@ __init int event_trace_init(void)
 	if (!entry)
 		pr_warn("Could not create tracefs 'available_events' entry\n");
 
-	if (trace_define_generic_fields())
-		pr_warn("tracing: Failed to allocated generic fields");
-
-	if (trace_define_common_fields())
-		pr_warn("tracing: Failed to allocate common fields");
-
 	ret = early_event_add_tracer(NULL, tr);
 	if (ret)
 		return ret;
@@ -3469,6 +3486,7 @@ void __init trace_event_init(void)
 	event_trace_memsetup();
 	init_ftrace_syscalls();
 	event_trace_enable();
+	event_trace_init_fields();
 }
 
 #ifdef CONFIG_EVENT_TRACE_STARTUP_TEST

From a838deab4e635994476bfc5b254bdf461e168752 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 10 Sep 2020 21:38:58 +0900
Subject: [PATCH 23/58] tracing: Enable adding dynamic events early stage

Split the event fields initialization from creating new event directory.
This allows the boot-time tracing to define dynamic events before
initializing events directory on tracefs.

Link: https://lkml.kernel.org/r/159974153790.478751.3475515065034825374.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_events.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 8e87fbab6930..42c0e7df6e70 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -38,6 +38,7 @@ DEFINE_MUTEX(event_mutex);
 LIST_HEAD(ftrace_events);
 static LIST_HEAD(ftrace_generic_fields);
 static LIST_HEAD(ftrace_common_fields);
+static bool eventdir_initialized;
 
 #define GFP_TRACE (GFP_KERNEL | __GFP_ZERO)
 
@@ -2486,7 +2487,10 @@ __trace_add_new_event(struct trace_event_call *call, struct trace_array *tr)
 	if (!file)
 		return -ENOMEM;
 
-	return event_create_dir(tr->event_dir, file);
+	if (eventdir_initialized)
+		return event_create_dir(tr->event_dir, file);
+	else
+		return event_define_fields(call);
 }
 
 /*
@@ -3478,6 +3482,9 @@ __init int event_trace_init(void)
 	if (ret)
 		pr_warn("Failed to register trace events module notifier\n");
 #endif
+
+	eventdir_initialized = true;
+
 	return 0;
 }
 

From 4114fbfd02f12d7a58cc4bd6fc36e0925266f9f7 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 10 Sep 2020 21:39:07 +0900
Subject: [PATCH 24/58] tracing: Enable creating new instance early boot

Enable creating new trace_array instance in early boot stage.
If the instances directory is not created, postpone it until
the tracefs is initialized.

Link: https://lkml.kernel.org/r/159974154763.478751.6289753509587233103.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c           | 53 +++++++++++++++++++++++++++-------
 kernel/trace/trace.h           |  7 +++++
 kernel/trace/trace_functions.c | 22 +++++++++-----
 3 files changed, 63 insertions(+), 19 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c35fcd2f2529..6211a13b3327 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -8636,6 +8636,24 @@ struct trace_array *trace_array_find_get(const char *instance)
 	return tr;
 }
 
+static int trace_array_create_dir(struct trace_array *tr)
+{
+	int ret;
+
+	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
+	if (!tr->dir)
+		return -EINVAL;
+
+	ret = event_trace_add_tracer(tr->dir, tr);
+	if (ret)
+		tracefs_remove(tr->dir);
+
+	init_tracer_tracefs(tr, tr->dir);
+	__update_tracer_options(tr);
+
+	return ret;
+}
+
 static struct trace_array *trace_array_create(const char *name)
 {
 	struct trace_array *tr;
@@ -8671,30 +8689,27 @@ static struct trace_array *trace_array_create(const char *name)
 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
 		goto out_free_tr;
 
-	tr->dir = tracefs_create_dir(name, trace_instance_dir);
-	if (!tr->dir)
+	if (ftrace_allocate_ftrace_ops(tr) < 0)
 		goto out_free_tr;
 
-	ret = event_trace_add_tracer(tr->dir, tr);
-	if (ret) {
-		tracefs_remove(tr->dir);
-		goto out_free_tr;
-	}
-
 	ftrace_init_trace_array(tr);
 
-	init_tracer_tracefs(tr, tr->dir);
 	init_trace_flags_index(tr);
-	__update_tracer_options(tr);
+
+	if (trace_instance_dir) {
+		ret = trace_array_create_dir(tr);
+		if (ret)
+			goto out_free_tr;
+	}
 
 	list_add(&tr->list, &ftrace_trace_arrays);
 
 	tr->ref++;
 
-
 	return tr;
 
  out_free_tr:
+	ftrace_free_ftrace_ops(tr);
 	free_trace_buffers(tr);
 	free_cpumask_var(tr->tracing_cpumask);
 	kfree(tr->name);
@@ -8852,11 +8867,27 @@ static int instance_rmdir(const char *name)
 
 static __init void create_trace_instances(struct dentry *d_tracer)
 {
+	struct trace_array *tr;
+
 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
 							 instance_mkdir,
 							 instance_rmdir);
 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
 		return;
+
+	mutex_lock(&event_mutex);
+	mutex_lock(&trace_types_lock);
+
+	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+		if (!tr->name)
+			continue;
+		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
+			     "Failed to create instance directory\n"))
+			break;
+	}
+
+	mutex_unlock(&trace_types_lock);
+	mutex_unlock(&event_mutex);
 }
 
 static void
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 0d3a405fe446..525434145eea 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1125,6 +1125,8 @@ extern int ftrace_is_dead(void);
 int ftrace_create_function_files(struct trace_array *tr,
 				 struct dentry *parent);
 void ftrace_destroy_function_files(struct trace_array *tr);
+int ftrace_allocate_ftrace_ops(struct trace_array *tr);
+void ftrace_free_ftrace_ops(struct trace_array *tr);
 void ftrace_init_global_array_ops(struct trace_array *tr);
 void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func);
 void ftrace_reset_array_ops(struct trace_array *tr);
@@ -1146,6 +1148,11 @@ ftrace_create_function_files(struct trace_array *tr,
 {
 	return 0;
 }
+static inline int ftrace_allocate_ftrace_ops(struct trace_array *tr)
+{
+	return 0;
+}
+static inline void ftrace_free_ftrace_ops(struct trace_array *tr) { }
 static inline void ftrace_destroy_function_files(struct trace_array *tr) { }
 static inline __init void
 ftrace_init_global_array_ops(struct trace_array *tr) { }
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index dd4dff71d89a..2c2126e1871d 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -34,10 +34,14 @@ enum {
 	TRACE_FUNC_OPT_STACK	= 0x1,
 };
 
-static int allocate_ftrace_ops(struct trace_array *tr)
+int ftrace_allocate_ftrace_ops(struct trace_array *tr)
 {
 	struct ftrace_ops *ops;
 
+	/* The top level array uses the "global_ops" */
+	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
+		return 0;
+
 	ops = kzalloc(sizeof(*ops), GFP_KERNEL);
 	if (!ops)
 		return -ENOMEM;
@@ -48,15 +52,19 @@ static int allocate_ftrace_ops(struct trace_array *tr)
 
 	tr->ops = ops;
 	ops->private = tr;
+
 	return 0;
 }
 
+void ftrace_free_ftrace_ops(struct trace_array *tr)
+{
+	kfree(tr->ops);
+	tr->ops = NULL;
+}
 
 int ftrace_create_function_files(struct trace_array *tr,
 				 struct dentry *parent)
 {
-	int ret;
-
 	/*
 	 * The top level array uses the "global_ops", and the files are
 	 * created on boot up.
@@ -64,9 +72,8 @@ int ftrace_create_function_files(struct trace_array *tr,
 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
 		return 0;
 
-	ret = allocate_ftrace_ops(tr);
-	if (ret)
-		return ret;
+	if (!tr->ops)
+		return -EINVAL;
 
 	ftrace_create_filter_files(tr->ops, parent);
 
@@ -76,8 +83,7 @@ int ftrace_create_function_files(struct trace_array *tr,
 void ftrace_destroy_function_files(struct trace_array *tr)
 {
 	ftrace_destroy_filter_files(tr->ops);
-	kfree(tr->ops);
-	tr->ops = NULL;
+	ftrace_free_ftrace_ops(tr);
 }
 
 static int function_trace_init(struct trace_array *tr)

From ba0fbfbb21cd90d51e4f6668ee8397e810818028 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 10 Sep 2020 21:39:17 +0900
Subject: [PATCH 25/58] tracing/boot, kprobe, synth: Initialize boot-time
 tracing earlier

Initialize boot-time tracing in core_initcall_sync instead of
fs_initcall, and initialize required tracers (kprobes and synth)
in core_initcall. This will allow the boot-time tracing to trace
__init code from the beginning of postcore_initcall stage.

Link: https://lkml.kernel.org/r/159974155727.478751.7486926132902849578.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_boot.c         |  7 +++++--
 kernel/trace/trace_events_synth.c | 23 ++++++++++++++++-------
 kernel/trace/trace_kprobe.c       |  6 +++---
 3 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index d52d441a17e8..754e3cf2df3a 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -340,5 +340,8 @@ static int __init trace_boot_init(void)
 
 	return 0;
 }
-
-fs_initcall(trace_boot_init);
+/*
+ * Start tracing at the end of core-initcall, so that it starts tracing
+ * from the beginning of postcore_initcall.
+ */
+core_initcall_sync(trace_boot_init);
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index 7c765e80e974..a9cd7793f7ea 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -1754,17 +1754,26 @@ static const struct file_operations synth_events_fops = {
 	.release        = seq_release,
 };
 
+/*
+ * Register dynevent at core_initcall. This allows kernel to setup kprobe
+ * events in postcore_initcall without tracefs.
+ */
+static __init int trace_events_synth_init_early(void)
+{
+	int err = 0;
+
+	err = dyn_event_register(&synth_event_ops);
+	if (err)
+		pr_warn("Could not register synth_event_ops\n");
+
+	return err;
+}
+core_initcall(trace_events_synth_init_early);
+
 static __init int trace_events_synth_init(void)
 {
 	struct dentry *entry = NULL;
 	int err = 0;
-
-	err = dyn_event_register(&synth_event_ops);
-	if (err) {
-		pr_warn("Could not register synth_event_ops\n");
-		return err;
-	}
-
 	err = tracing_init_dentry();
 	if (err)
 		goto err;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 9d46415296eb..b911e9f6d9f5 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1897,8 +1897,8 @@ static __init void setup_boot_kprobe_events(void)
 }
 
 /*
- * Register dynevent at subsys_initcall. This allows kernel to setup kprobe
- * events in fs_initcall without tracefs.
+ * Register dynevent at core_initcall. This allows kernel to setup kprobe
+ * events in postcore_initcall without tracefs.
  */
 static __init int init_kprobe_trace_early(void)
 {
@@ -1913,7 +1913,7 @@ static __init int init_kprobe_trace_early(void)
 
 	return 0;
 }
-subsys_initcall(init_kprobe_trace_early);
+core_initcall(init_kprobe_trace_early);
 
 /* Make a tracefs interface for controlling probe points */
 static __init int init_kprobe_trace(void)

From fd264ce96c382bc2e36eb1f49ac45c5980650244 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 10 Sep 2020 21:39:27 +0900
Subject: [PATCH 26/58] Documentation: tracing: Add the startup timing of
 boot-time tracing

Add the note about when to start the boot-time tracing.
This will be needed for the people who wants to trace
earlier boot sequence.

Link: https://lkml.kernel.org/r/159974156678.478751.10215894815285734481.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 Documentation/trace/boottime-trace.rst | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/Documentation/trace/boottime-trace.rst b/Documentation/trace/boottime-trace.rst
index ab3bfd67197c..89b64334929b 100644
--- a/Documentation/trace/boottime-trace.rst
+++ b/Documentation/trace/boottime-trace.rst
@@ -120,6 +120,20 @@ instance node, but those are also visible from other instances. So please
 take care for event name conflict.
 
 
+When to Start
+=============
+
+All boot-time tracing options starting with ``ftrace`` will be enabled at the
+end of core_initcall. This means you can trace the events from postcore_initcall.
+Most of the subsystems and architecture dependent drivers will be initialized
+after that (arch_initcall or subsys_initcall). Thus, you can trace those with
+boot-time tracing.
+If you want to trace events before core_initcall, you can use the options
+starting with ``kernel``. Some of them will be enabled eariler than the initcall
+processing (for example,. ``kernel.ftrace=function`` and ``kernel.trace_event``
+will start before the initcall.)
+
+
 Examples
 ========
 

From 720dee53ad8dfd528941fbbc264574601b04488a Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Fri, 25 Sep 2020 01:40:08 +0900
Subject: [PATCH 27/58] tracing/boot: Initialize per-instance event list in
 early boot

Initialize per-instance event list in early boot time (before
initializing instance directory on tracefs). This fixes boot-time
tracing to correctly handle the boot-time per-instance settings.

Link: https://lkml.kernel.org/r/160096560826.182763.17110991546046128881.stgit@devnote2

Fixes: 4114fbfd02f1 ("tracing: Enable creating new instance early boot")
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c        |  3 ++-
 kernel/trace/trace.h        |  1 +
 kernel/trace/trace_events.c | 30 ++++++++++++++++--------------
 3 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 6211a13b3327..3f2533adae72 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -8700,7 +8700,8 @@ static struct trace_array *trace_array_create(const char *name)
 		ret = trace_array_create_dir(tr);
 		if (ret)
 			goto out_free_tr;
-	}
+	} else
+		__trace_early_add_events(tr);
 
 	list_add(&tr->list, &ftrace_trace_arrays);
 
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 525434145eea..5b0e797cacdd 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1658,6 +1658,7 @@ extern void trace_event_enable_tgid_record(bool enable);
 extern int event_trace_init(void);
 extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr);
 extern int event_trace_del_tracer(struct trace_array *tr);
+extern void __trace_early_add_events(struct trace_array *tr);
 
 extern struct trace_event_file *__find_event_file(struct trace_array *tr,
 						  const char *system,
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 42c0e7df6e70..851ab37058dd 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -3131,14 +3131,13 @@ static inline int register_event_cmds(void) { return 0; }
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
 /*
- * The top level array has already had its trace_event_file
- * descriptors created in order to allow for early events to
- * be recorded. This function is called after the tracefs has been
- * initialized, and we now have to create the files associated
- * to the events.
+ * The top level array and trace arrays created by boot-time tracing
+ * have already had its trace_event_file descriptors created in order
+ * to allow for early events to be recorded.
+ * This function is called after the tracefs has been initialized,
+ * and we now have to create the files associated to the events.
  */
-static __init void
-__trace_early_add_event_dirs(struct trace_array *tr)
+static void __trace_early_add_event_dirs(struct trace_array *tr)
 {
 	struct trace_event_file *file;
 	int ret;
@@ -3153,13 +3152,12 @@ __trace_early_add_event_dirs(struct trace_array *tr)
 }
 
 /*
- * For early boot up, the top trace array requires to have
- * a list of events that can be enabled. This must be done before
- * the filesystem is set up in order to allow events to be traced
- * early.
+ * For early boot up, the top trace array and the trace arrays created
+ * by boot-time tracing require to have a list of events that can be
+ * enabled. This must be done before the filesystem is set up in order
+ * to allow events to be traced early.
  */
-static __init void
-__trace_early_add_events(struct trace_array *tr)
+void __trace_early_add_events(struct trace_array *tr)
 {
 	struct trace_event_call *call;
 	int ret;
@@ -3290,7 +3288,11 @@ int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr)
 		goto out;
 
 	down_write(&trace_event_sem);
-	__trace_add_event_dirs(tr);
+	/* If tr already has the event list, it is initialized in early boot. */
+	if (unlikely(!list_empty(&tr->events)))
+		__trace_early_add_event_dirs(tr);
+	else
+		__trace_add_event_dirs(tr);
 	up_write(&trace_event_sem);
 
  out:

From afbe7973173a7ce0a68af8b33e44c967582297be Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Thu, 24 Sep 2020 12:30:20 -0400
Subject: [PATCH 28/58] tracepoints: Add helper to test if tracepoint is
 enabled in a header

As tracepoints are discouraged from being added in a header because it can
cause side effects if other tracepoints are in headers, as well as bloat the
kernel as the trace_<tracepoint>() function is not a small inline, the common
workaround is to add a function call that calls a wrapper function in a
C file that then calls the tracepoint. But as function calls add overhead,
this function should only be called when the tracepoint in question is
enabled. To get around this overhead, a static_branch can be used to only
have the tracepoint wrapper get called when the tracepoint is enabled.

Add a tracepoint_enabled(tp) macro that gets passed the name of the
tracepoint, and this becomes a static_branch that is enabled when the
tracepoint is enabled and is a nop when the tracepoint is disabled.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 Documentation/trace/tracepoints.rst | 27 +++++++++++++++++++++++
 include/linux/tracepoint-defs.h     | 34 +++++++++++++++++++++++++++++
 2 files changed, 61 insertions(+)

diff --git a/Documentation/trace/tracepoints.rst b/Documentation/trace/tracepoints.rst
index 6e3ce3bf3593..0cb8d9ca3d60 100644
--- a/Documentation/trace/tracepoints.rst
+++ b/Documentation/trace/tracepoints.rst
@@ -146,3 +146,30 @@ with jump labels and avoid conditional branches.
       define tracepoints. Check http://lwn.net/Articles/379903,
       http://lwn.net/Articles/381064 and http://lwn.net/Articles/383362
       for a series of articles with more details.
+
+If you require calling a tracepoint from a header file, it is not
+recommended to call one directly or to use the trace_<tracepoint>_enabled()
+function call, as tracepoints in header files can have side effects if a
+header is included from a file that has CREATE_TRACE_POINTS set, as
+well as the trace_<tracepoint>() is not that small of an inline
+and can bloat the kernel if used by other inlined functions. Instead,
+include tracepoint-defs.h and use tracepoint_enabled().
+
+In a C file::
+
+	void do_trace_foo_bar_wrapper(args)
+	{
+		trace_foo_bar(args);
+	}
+
+In the header file::
+
+	DECLARE_TRACEPOINT(foo_bar);
+
+	static inline void some_inline_function()
+	{
+		[..]
+		if (tracepoint_enabled(foo_bar))
+			do_trace_foo_bar_wrapper(args);
+		[..]
+	}
diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h
index b29950a19205..60625973faaf 100644
--- a/include/linux/tracepoint-defs.h
+++ b/include/linux/tracepoint-defs.h
@@ -48,4 +48,38 @@ struct bpf_raw_event_map {
 	u32			writable_size;
 } __aligned(32);
 
+/*
+ * If a tracepoint needs to be called from a header file, it is not
+ * recommended to call it directly, as tracepoints in header files
+ * may cause side-effects and bloat the kernel. Instead, use
+ * tracepoint_enabled() to test if the tracepoint is enabled, then if
+ * it is, call a wrapper function defined in a C file that will then
+ * call the tracepoint.
+ *
+ * For "trace_foo_bar()", you would need to create a wrapper function
+ * in a C file to call trace_foo_bar():
+ *   void do_trace_foo_bar(args) { trace_foo_bar(args); }
+ * Then in the header file, declare the tracepoint:
+ *   DECLARE_TRACEPOINT(foo_bar);
+ * And call your wrapper:
+ *   static inline void some_inlined_function() {
+ *            [..]
+ *            if (tracepoint_enabled(foo_bar))
+ *                    do_trace_foo_bar(args);
+ *            [..]
+ *   }
+ *
+ * Note: tracepoint_enabled(foo_bar) is equivalent to trace_foo_bar_enabled()
+ *   but is safe to have in headers, where trace_foo_bar_enabled() is not.
+ */
+#define DECLARE_TRACEPOINT(tp) \
+	extern struct tracepoint __tracepoint_##tp
+
+#ifdef CONFIG_TRACEPOINTS
+# define tracepoint_enabled(tp) \
+	static_key_false(&(__tracepoint_##tp).key)
+#else
+# define tracepoint_enabled(tracepoint) false
+#endif
+
 #endif

From c65fc2276f0f022f5ad4a84658add2b28cff7227 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Thu, 24 Sep 2020 12:43:46 -0400
Subject: [PATCH 29/58] mm/page_ref: Convert the open coded tracepoint enabled
 to the new helper

As more use cases of checking if a tracepoint is enabled in a header are
coming to fruition, a helper macro, tracepoint_enabled(), has been added to
check if a tracepoint is enabled or not, and can be used with minimal header
requirements (avoid "include hell"). Convert the page_ref logic over to the
new helper macro.

Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/page_ref.h | 42 ++++++++++++++++++++--------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index d27701199a4d..f3318f34fc54 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -7,13 +7,13 @@
 #include <linux/page-flags.h>
 #include <linux/tracepoint-defs.h>
 
-extern struct tracepoint __tracepoint_page_ref_set;
-extern struct tracepoint __tracepoint_page_ref_mod;
-extern struct tracepoint __tracepoint_page_ref_mod_and_test;
-extern struct tracepoint __tracepoint_page_ref_mod_and_return;
-extern struct tracepoint __tracepoint_page_ref_mod_unless;
-extern struct tracepoint __tracepoint_page_ref_freeze;
-extern struct tracepoint __tracepoint_page_ref_unfreeze;
+DECLARE_TRACEPOINT(page_ref_set);
+DECLARE_TRACEPOINT(page_ref_mod);
+DECLARE_TRACEPOINT(page_ref_mod_and_test);
+DECLARE_TRACEPOINT(page_ref_mod_and_return);
+DECLARE_TRACEPOINT(page_ref_mod_unless);
+DECLARE_TRACEPOINT(page_ref_freeze);
+DECLARE_TRACEPOINT(page_ref_unfreeze);
 
 #ifdef CONFIG_DEBUG_PAGE_REF
 
@@ -24,7 +24,7 @@ extern struct tracepoint __tracepoint_page_ref_unfreeze;
  *
  * See trace_##name##_enabled(void) in include/linux/tracepoint.h
  */
-#define page_ref_tracepoint_active(t) static_key_false(&(t).key)
+#define page_ref_tracepoint_active(t) tracepoint_enabled(t)
 
 extern void __page_ref_set(struct page *page, int v);
 extern void __page_ref_mod(struct page *page, int v);
@@ -75,7 +75,7 @@ static inline int page_count(struct page *page)
 static inline void set_page_count(struct page *page, int v)
 {
 	atomic_set(&page->_refcount, v);
-	if (page_ref_tracepoint_active(__tracepoint_page_ref_set))
+	if (page_ref_tracepoint_active(page_ref_set))
 		__page_ref_set(page, v);
 }
 
@@ -91,14 +91,14 @@ static inline void init_page_count(struct page *page)
 static inline void page_ref_add(struct page *page, int nr)
 {
 	atomic_add(nr, &page->_refcount);
-	if (page_ref_tracepoint_active(__tracepoint_page_ref_mod))
+	if (page_ref_tracepoint_active(page_ref_mod))
 		__page_ref_mod(page, nr);
 }
 
 static inline void page_ref_sub(struct page *page, int nr)
 {
 	atomic_sub(nr, &page->_refcount);
-	if (page_ref_tracepoint_active(__tracepoint_page_ref_mod))
+	if (page_ref_tracepoint_active(page_ref_mod))
 		__page_ref_mod(page, -nr);
 }
 
@@ -106,7 +106,7 @@ static inline int page_ref_sub_return(struct page *page, int nr)
 {
 	int ret = atomic_sub_return(nr, &page->_refcount);
 
-	if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_and_return))
+	if (page_ref_tracepoint_active(page_ref_mod_and_return))
 		__page_ref_mod_and_return(page, -nr, ret);
 	return ret;
 }
@@ -114,14 +114,14 @@ static inline int page_ref_sub_return(struct page *page, int nr)
 static inline void page_ref_inc(struct page *page)
 {
 	atomic_inc(&page->_refcount);
-	if (page_ref_tracepoint_active(__tracepoint_page_ref_mod))
+	if (page_ref_tracepoint_active(page_ref_mod))
 		__page_ref_mod(page, 1);
 }
 
 static inline void page_ref_dec(struct page *page)
 {
 	atomic_dec(&page->_refcount);
-	if (page_ref_tracepoint_active(__tracepoint_page_ref_mod))
+	if (page_ref_tracepoint_active(page_ref_mod))
 		__page_ref_mod(page, -1);
 }
 
@@ -129,7 +129,7 @@ static inline int page_ref_sub_and_test(struct page *page, int nr)
 {
 	int ret = atomic_sub_and_test(nr, &page->_refcount);
 
-	if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_and_test))
+	if (page_ref_tracepoint_active(page_ref_mod_and_test))
 		__page_ref_mod_and_test(page, -nr, ret);
 	return ret;
 }
@@ -138,7 +138,7 @@ static inline int page_ref_inc_return(struct page *page)
 {
 	int ret = atomic_inc_return(&page->_refcount);
 
-	if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_and_return))
+	if (page_ref_tracepoint_active(page_ref_mod_and_return))
 		__page_ref_mod_and_return(page, 1, ret);
 	return ret;
 }
@@ -147,7 +147,7 @@ static inline int page_ref_dec_and_test(struct page *page)
 {
 	int ret = atomic_dec_and_test(&page->_refcount);
 
-	if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_and_test))
+	if (page_ref_tracepoint_active(page_ref_mod_and_test))
 		__page_ref_mod_and_test(page, -1, ret);
 	return ret;
 }
@@ -156,7 +156,7 @@ static inline int page_ref_dec_return(struct page *page)
 {
 	int ret = atomic_dec_return(&page->_refcount);
 
-	if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_and_return))
+	if (page_ref_tracepoint_active(page_ref_mod_and_return))
 		__page_ref_mod_and_return(page, -1, ret);
 	return ret;
 }
@@ -165,7 +165,7 @@ static inline int page_ref_add_unless(struct page *page, int nr, int u)
 {
 	int ret = atomic_add_unless(&page->_refcount, nr, u);
 
-	if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_unless))
+	if (page_ref_tracepoint_active(page_ref_mod_unless))
 		__page_ref_mod_unless(page, nr, ret);
 	return ret;
 }
@@ -174,7 +174,7 @@ static inline int page_ref_freeze(struct page *page, int count)
 {
 	int ret = likely(atomic_cmpxchg(&page->_refcount, count, 0) == count);
 
-	if (page_ref_tracepoint_active(__tracepoint_page_ref_freeze))
+	if (page_ref_tracepoint_active(page_ref_freeze))
 		__page_ref_freeze(page, count, ret);
 	return ret;
 }
@@ -185,7 +185,7 @@ static inline void page_ref_unfreeze(struct page *page, int count)
 	VM_BUG_ON(count == 0);
 
 	atomic_set_release(&page->_refcount, count);
-	if (page_ref_tracepoint_active(__tracepoint_page_ref_unfreeze))
+	if (page_ref_tracepoint_active(page_ref_unfreeze))
 		__page_ref_unfreeze(page, count);
 }
 

From fdb46faeab2f3fa2b43a55059b33b8f98b2e1442 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Fri, 25 Sep 2020 15:31:44 -0400
Subject: [PATCH 30/58] x86: Use tracepoint_enabled() for msr tracepoints
 instead of open coding it

7f47d8cc039f ("x86, tracing, perf: Add trace point for MSR accesses") added
tracing of msr read and write, but because of complexity in having
tracepoints in headers, and even more so for a core header like msr.h, not
to mention the bloat a tracepoint adds to inline functions, a helper
function is needed to be called from the header.

Use the new tracepoint_enabled() macro in tracepoint-defs.h to test if the
tracepoint is active before calling the helper function, instead of open
coding the same logic, which requires knowing the internals of a tracepoint.

Cc: Andi Kleen <ak@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 arch/x86/include/asm/msr.h | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 86f20d520a07..0b4920a7238e 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -60,22 +60,20 @@ struct saved_msrs {
 #define EAX_EDX_RET(val, low, high)	"=A" (val)
 #endif
 
-#ifdef CONFIG_TRACEPOINTS
 /*
  * Be very careful with includes. This header is prone to include loops.
  */
 #include <asm/atomic.h>
 #include <linux/tracepoint-defs.h>
 
-extern struct tracepoint __tracepoint_read_msr;
-extern struct tracepoint __tracepoint_write_msr;
-extern struct tracepoint __tracepoint_rdpmc;
-#define msr_tracepoint_active(t) static_key_false(&(t).key)
+#ifdef CONFIG_TRACEPOINTS
+DECLARE_TRACEPOINT(read_msr);
+DECLARE_TRACEPOINT(write_msr);
+DECLARE_TRACEPOINT(rdpmc);
 extern void do_trace_write_msr(unsigned int msr, u64 val, int failed);
 extern void do_trace_read_msr(unsigned int msr, u64 val, int failed);
 extern void do_trace_rdpmc(unsigned int msr, u64 val, int failed);
 #else
-#define msr_tracepoint_active(t) false
 static inline void do_trace_write_msr(unsigned int msr, u64 val, int failed) {}
 static inline void do_trace_read_msr(unsigned int msr, u64 val, int failed) {}
 static inline void do_trace_rdpmc(unsigned int msr, u64 val, int failed) {}
@@ -128,7 +126,7 @@ static inline unsigned long long native_read_msr(unsigned int msr)
 
 	val = __rdmsr(msr);
 
-	if (msr_tracepoint_active(__tracepoint_read_msr))
+	if (tracepoint_enabled(read_msr))
 		do_trace_read_msr(msr, val, 0);
 
 	return val;
@@ -150,7 +148,7 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr,
 		     _ASM_EXTABLE(2b, 3b)
 		     : [err] "=r" (*err), EAX_EDX_RET(val, low, high)
 		     : "c" (msr), [fault] "i" (-EIO));
-	if (msr_tracepoint_active(__tracepoint_read_msr))
+	if (tracepoint_enabled(read_msr))
 		do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), *err);
 	return EAX_EDX_VAL(val, low, high);
 }
@@ -161,7 +159,7 @@ native_write_msr(unsigned int msr, u32 low, u32 high)
 {
 	__wrmsr(msr, low, high);
 
-	if (msr_tracepoint_active(__tracepoint_write_msr))
+	if (tracepoint_enabled(write_msr))
 		do_trace_write_msr(msr, ((u64)high << 32 | low), 0);
 }
 
@@ -181,7 +179,7 @@ native_write_msr_safe(unsigned int msr, u32 low, u32 high)
 		     : "c" (msr), "0" (low), "d" (high),
 		       [fault] "i" (-EIO)
 		     : "memory");
-	if (msr_tracepoint_active(__tracepoint_write_msr))
+	if (tracepoint_enabled(write_msr))
 		do_trace_write_msr(msr, ((u64)high << 32 | low), err);
 	return err;
 }
@@ -248,7 +246,7 @@ static inline unsigned long long native_read_pmc(int counter)
 	DECLARE_ARGS(val, low, high);
 
 	asm volatile("rdpmc" : EAX_EDX_RET(val, low, high) : "c" (counter));
-	if (msr_tracepoint_active(__tracepoint_rdpmc))
+	if (tracepoint_enabled(rdpmc))
 		do_trace_rdpmc(counter, EAX_EDX_VAL(val, low, high), 0);
 	return EAX_EDX_VAL(val, low, high);
 }

From fdda88d31addb19a35ac7962e752e55aaeb5c20a Mon Sep 17 00:00:00 2001
From: Qiujun Huang <hqjagain@gmail.com>
Date: Fri, 2 Oct 2020 22:31:26 +0800
Subject: [PATCH 31/58] ftrace: Fix some typos in comment

s/coorditate/coordinate/
s/emty/empty/
s/preeptive/preemptive/
s/succes/success/
s/carefule/careful/

Link: https://lkml.kernel.org/r/20201002143126.2890-1-hqjagain@gmail.com

Signed-off-by: Qiujun Huang <hqjagain@gmail.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 84f32dbc7be8..123d520b9261 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -230,7 +230,7 @@ static void update_ftrace_function(void)
 	/*
 	 * For static tracing, we need to be a bit more careful.
 	 * The function change takes affect immediately. Thus,
-	 * we need to coorditate the setting of the function_trace_ops
+	 * we need to coordinate the setting of the function_trace_ops
 	 * with the setting of the ftrace_trace_function.
 	 *
 	 * Set the function to the list ops, which will call the
@@ -1451,7 +1451,7 @@ static bool hash_contains_ip(unsigned long ip,
 {
 	/*
 	 * The function record is a match if it exists in the filter
-	 * hash and not in the notrace hash. Note, an emty hash is
+	 * hash and not in the notrace hash. Note, an empty hash is
 	 * considered a match for the filter hash, but an empty
 	 * notrace hash is considered not in the notrace hash.
 	 */
@@ -2976,7 +2976,7 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command)
 		synchronize_rcu_tasks_rude();
 
 		/*
-		 * When the kernel is preeptive, tasks can be preempted
+		 * When the kernel is preemptive, tasks can be preempted
 		 * while on a ftrace trampoline. Just scheduling a task on
 		 * a CPU is not good enough to flush them. Calling
 		 * synchornize_rcu_tasks() will wait for those tasks to
@@ -4368,7 +4368,7 @@ void **ftrace_func_mapper_find_ip(struct ftrace_func_mapper *mapper,
  * @ip: The instruction pointer address to map @data to
  * @data: The data to map to @ip
  *
- * Returns 0 on succes otherwise an error.
+ * Returns 0 on success otherwise an error.
  */
 int ftrace_func_mapper_add_ip(struct ftrace_func_mapper *mapper,
 			      unsigned long ip, void *data)
@@ -4536,7 +4536,7 @@ register_ftrace_function_probe(char *glob, struct trace_array *tr,
 
 	/*
 	 * Note, there's a small window here that the func_hash->filter_hash
-	 * may be NULL or empty. Need to be carefule when reading the loop.
+	 * may be NULL or empty. Need to be careful when reading the loop.
 	 */
 	mutex_lock(&probe->ops.func_hash->regex_lock);
 

From 4a4a56b4e76bbe3211c4f93d99c0c1543f5f3230 Mon Sep 17 00:00:00 2001
From: Tom Zanussi <zanussi@kernel.org>
Date: Sun, 4 Oct 2020 17:14:03 -0500
Subject: [PATCH 32/58] tracing: Change STR_VAR_MAX_LEN

32 is too small for this value, and anyway it makes more sense to use
MAX_FILTER_STR_VAL, as this is also the value used for variable-length
__strings.

Link: https://lkml.kernel.org/r/6adfd1668ac1fd8670bd58206944a762061a5559.1601848695.git.zanussi@kernel.org

Tested-by: Axel Rasmussen <axelrasmussen@google.com>
Signed-off-by: Tom Zanussi <zanussi@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_hist.c | 2 ++
 kernel/trace/trace_synth.h       | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 1b2ef6490229..3b22e2122d1a 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -1398,6 +1398,8 @@ static int hist_trigger_elt_data_alloc(struct tracing_map_elt *elt)
 
 	n_str = hist_data->n_field_var_str + hist_data->n_save_var_str;
 
+	BUILD_BUG_ON(STR_VAR_LEN_MAX & (sizeof(u64) - 1));
+
 	size = STR_VAR_LEN_MAX;
 
 	for (i = 0; i < n_str; i++) {
diff --git a/kernel/trace/trace_synth.h b/kernel/trace/trace_synth.h
index ac35c45207c4..5166705d1556 100644
--- a/kernel/trace/trace_synth.h
+++ b/kernel/trace/trace_synth.h
@@ -7,7 +7,7 @@
 #define SYNTH_SYSTEM		"synthetic"
 #define SYNTH_FIELDS_MAX	32
 
-#define STR_VAR_LEN_MAX		32 /* must be multiple of sizeof(u64) */
+#define STR_VAR_LEN_MAX		MAX_FILTER_STR_VAL /* must be multiple of sizeof(u64) */
 
 struct synth_field {
 	char *type;

From 8fbeb52a598c7ab5aa603d6bb083b8a8d16d607a Mon Sep 17 00:00:00 2001
From: Tom Zanussi <zanussi@kernel.org>
Date: Sun, 4 Oct 2020 17:14:04 -0500
Subject: [PATCH 33/58] tracing: Fix parse_synth_field() error handling

synth_field_size() returns either a positive size or an error (zero or
a negative value). However, the existing code assumes the only error
value is 0. It doesn't handle negative error codes, as it assigns
directly to field->size (a size_t; unsigned), thereby interpreting the
error code as a valid size instead.

Do the test before assignment to field->size.

[ axelrasmussen@google.com: changelog addition, first paragraph above ]

Link: https://lkml.kernel.org/r/9b6946d9776b2eeb43227678158196de1c3c6e1d.1601848695.git.zanussi@kernel.org

Fixes: 4b147936fa50 (tracing: Add support for 'synthetic' events)
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Tested-by: Axel Rasmussen <axelrasmussen@google.com>
Signed-off-by: Tom Zanussi <zanussi@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_synth.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index a9cd7793f7ea..fa8a99828f41 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -465,6 +465,7 @@ static struct synth_field *parse_synth_field(int argc, const char **argv,
 	struct synth_field *field;
 	const char *prefix = NULL, *field_type = argv[0], *field_name, *array;
 	int len, ret = 0;
+	ssize_t size;
 
 	if (field_type[0] == ';')
 		field_type++;
@@ -520,11 +521,12 @@ static struct synth_field *parse_synth_field(int argc, const char **argv,
 			field->type[len - 1] = '\0';
 	}
 
-	field->size = synth_field_size(field->type);
-	if (!field->size) {
+	size = synth_field_size(field->type);
+	if (size <= 0) {
 		ret = -EINVAL;
 		goto free;
 	}
+	field->size = size;
 
 	if (synth_field_is_string(field->type))
 		field->is_string = true;

From 63a1e5de3006f4ad713e4d72bcb404d0301e853d Mon Sep 17 00:00:00 2001
From: Tom Zanussi <zanussi@kernel.org>
Date: Sun, 4 Oct 2020 17:14:05 -0500
Subject: [PATCH 34/58] tracing: Save normal string variables

String variables created as field variables and save variables are
already handled properly by having their values copied when set.  The
same isn't done for normal variables, but needs to be - simply saving
a pointer to a string contained in an old event isn't sufficient,
since that event's data may quickly become overwritten and therefore a
string pointer to it could yield garbage.

This change uses the same mechanism as field variables and simply
appends the new strings to the existing per-element field_var_str[]
array allocated for that purpose.

Link: https://lkml.kernel.org/r/1c1a03798b02e67307412a0c719d1bfb69b13007.1601848695.git.zanussi@kernel.org

Fixes: 02205a6752f2 (tracing: Add support for 'field variables')
Signed-off-by: Tom Zanussi <zanussi@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_hist.c | 34 ++++++++++++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 3b22e2122d1a..812bc5f94b5c 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -147,6 +147,8 @@ struct hist_field {
 	 */
 	unsigned int			var_ref_idx;
 	bool                            read_once;
+
+	unsigned int			var_str_idx;
 };
 
 static u64 hist_field_none(struct hist_field *field,
@@ -349,6 +351,7 @@ struct hist_trigger_data {
 	unsigned int			n_keys;
 	unsigned int			n_fields;
 	unsigned int			n_vars;
+	unsigned int			n_var_str;
 	unsigned int			key_size;
 	struct tracing_map_sort_key	sort_keys[TRACING_MAP_SORT_KEYS_MAX];
 	unsigned int			n_sort_keys;
@@ -1396,7 +1399,12 @@ static int hist_trigger_elt_data_alloc(struct tracing_map_elt *elt)
 		}
 	}
 
-	n_str = hist_data->n_field_var_str + hist_data->n_save_var_str;
+	n_str = hist_data->n_field_var_str + hist_data->n_save_var_str +
+		hist_data->n_var_str;
+	if (n_str > SYNTH_FIELDS_MAX) {
+		hist_elt_data_free(elt_data);
+		return -EINVAL;
+	}
 
 	BUILD_BUG_ON(STR_VAR_LEN_MAX & (sizeof(u64) - 1));
 
@@ -3653,6 +3661,7 @@ static int create_var_field(struct hist_trigger_data *hist_data,
 {
 	struct trace_array *tr = hist_data->event_file->tr;
 	unsigned long flags = 0;
+	int ret;
 
 	if (WARN_ON(val_idx >= TRACING_MAP_VALS_MAX + TRACING_MAP_VARS_MAX))
 		return -EINVAL;
@@ -3667,7 +3676,12 @@ static int create_var_field(struct hist_trigger_data *hist_data,
 	if (WARN_ON(hist_data->n_vars > TRACING_MAP_VARS_MAX))
 		return -EINVAL;
 
-	return __create_val_field(hist_data, val_idx, file, var_name, expr_str, flags);
+	ret = __create_val_field(hist_data, val_idx, file, var_name, expr_str, flags);
+
+	if (hist_data->fields[val_idx]->flags & HIST_FIELD_FL_STRING)
+		hist_data->fields[val_idx]->var_str_idx = hist_data->n_var_str++;
+
+	return ret;
 }
 
 static int create_val_fields(struct hist_trigger_data *hist_data,
@@ -4394,6 +4408,22 @@ static void hist_trigger_elt_update(struct hist_trigger_data *hist_data,
 		hist_val = hist_field->fn(hist_field, elt, rbe, rec);
 		if (hist_field->flags & HIST_FIELD_FL_VAR) {
 			var_idx = hist_field->var.idx;
+
+			if (hist_field->flags & HIST_FIELD_FL_STRING) {
+				unsigned int str_start, var_str_idx, idx;
+				char *str, *val_str;
+
+				str_start = hist_data->n_field_var_str +
+					hist_data->n_save_var_str;
+				var_str_idx = hist_field->var_str_idx;
+				idx = str_start + var_str_idx;
+
+				str = elt_data->field_var_str[idx];
+				val_str = (char *)(uintptr_t)hist_val;
+				strscpy(str, val_str, STR_VAR_LEN_MAX);
+
+				hist_val = (u64)(uintptr_t)str;
+			}
 			tracing_map_set_var(elt, var_idx, hist_val);
 			continue;
 		}

From bd82631d7ccdc894af2738e47abcba2cb6e7dea9 Mon Sep 17 00:00:00 2001
From: Tom Zanussi <zanussi@kernel.org>
Date: Sun, 4 Oct 2020 17:14:06 -0500
Subject: [PATCH 35/58] tracing: Add support for dynamic strings to synthetic
 events

Currently, sythetic events only support static string fields such as:

  # echo 'test_latency u64 lat; char somename[32]' > /sys/kernel/debug/tracing/synthetic_events

Which is fine, but wastes a lot of space in the event.

It also prevents the most commonly-defined strings in the existing
trace events e.g. those defined using __string(), from being passed to
synthetic events via the trace() action.

With this change, synthetic events with dynamic fields can be defined:

  # echo 'test_latency u64 lat; char somename[]' > /sys/kernel/debug/tracing/synthetic_events

And the trace() action can be used to generate events using either
dynamic or static strings:

  # echo 'hist:keys=name:lat=common_timestamp.usecs-$ts0:onmatch(sys.event).test_latency($lat,name)' > /sys/kernel/debug/tracing/events

The synthetic event dynamic strings are implemented in the same way as
the existing __data_loc strings and appear as such in the format file.

[ <rostedt@goodmis.org>: added __set_synth_event_print_fmt() changes:

  I added the following to make it work with trace-cmd. Dynamic strings
  must have __get_str() for events in the print_fmt otherwise it can't be
  parsed correctly. ]

Link: https://lore.kernel.org/r/cover.1601588066.git.zanussi@kernel.org
Link: https://lkml.kernel.org/r/3ed35b6d0e390f5b94cb4a9ba1cc18f5982ab277.1601848695.git.zanussi@kernel.org

Tested-by: Axel Rasmussen <axelrasmussen@google.com>
Signed-off-by: Tom Zanussi <zanussi@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 Documentation/trace/events.rst      |  15 +-
 Documentation/trace/histogram.rst   |  18 ++
 kernel/trace/synth_event_gen_test.c |  18 +-
 kernel/trace/trace_events_hist.c    |   9 +
 kernel/trace/trace_events_synth.c   | 248 ++++++++++++++++++++++++----
 kernel/trace/trace_synth.h          |   4 +
 6 files changed, 272 insertions(+), 40 deletions(-)

diff --git a/Documentation/trace/events.rst b/Documentation/trace/events.rst
index f792b1959a33..2a5aa48eff6c 100644
--- a/Documentation/trace/events.rst
+++ b/Documentation/trace/events.rst
@@ -589,8 +589,19 @@ name::
         { .type = "int",                .name = "my_int_field" },
   };
 
-See synth_field_size() for available types. If field_name contains [n]
-the field is considered to be an array.
+See synth_field_size() for available types.
+
+If field_name contains [n], the field is considered to be a static array.
+
+If field_names contains[] (no subscript), the field is considered to
+be a dynamic array, which will only take as much space in the event as
+is required to hold the array.
+
+Because space for an event is reserved before assigning field values
+to the event, using dynamic arrays implies that the piecewise
+in-kernel API described below can't be used with dynamic arrays.  The
+other non-piecewise in-kernel APIs can, however, be used with dynamic
+arrays.
 
 If the event is created from within a module, a pointer to the module
 must be passed to synth_event_create().  This will ensure that the
diff --git a/Documentation/trace/histogram.rst b/Documentation/trace/histogram.rst
index 8408670d0328..b573604deabd 100644
--- a/Documentation/trace/histogram.rst
+++ b/Documentation/trace/histogram.rst
@@ -1776,6 +1776,24 @@ consisting of the name of the new event along with one or more
 variables and their types, which can be any valid field type,
 separated by semicolons, to the tracing/synthetic_events file.
 
+See synth_field_size() for available types.
+
+If field_name contains [n], the field is considered to be a static array.
+
+If field_names contains[] (no subscript), the field is considered to
+be a dynamic array, which will only take as much space in the event as
+is required to hold the array.
+
+A string field can be specified using either the static notation:
+
+  char name[32];
+
+Or the dynamic:
+
+  char name[];
+
+The size limit for either is 256.
+
 For instance, the following creates a new event named 'wakeup_latency'
 with 3 fields: lat, pid, and prio.  Each of those fields is simply a
 variable reference to a variable on another event::
diff --git a/kernel/trace/synth_event_gen_test.c b/kernel/trace/synth_event_gen_test.c
index 7d56d621ffea..edd912cd14aa 100644
--- a/kernel/trace/synth_event_gen_test.c
+++ b/kernel/trace/synth_event_gen_test.c
@@ -242,9 +242,11 @@ static struct synth_field_desc create_synth_test_fields[] = {
 	{ .type = "pid_t",		.name = "next_pid_field" },
 	{ .type = "char[16]",		.name = "next_comm_field" },
 	{ .type = "u64",		.name = "ts_ns" },
+	{ .type = "char[]",		.name = "dynstring_field_1" },
 	{ .type = "u64",		.name = "ts_ms" },
 	{ .type = "unsigned int",	.name = "cpu" },
 	{ .type = "char[64]",		.name = "my_string_field" },
+	{ .type = "char[]",		.name = "dynstring_field_2" },
 	{ .type = "int",		.name = "my_int_field" },
 };
 
@@ -254,7 +256,7 @@ static struct synth_field_desc create_synth_test_fields[] = {
  */
 static int __init test_create_synth_event(void)
 {
-	u64 vals[7];
+	u64 vals[9];
 	int ret;
 
 	/* Create the create_synth_test event with the fields above */
@@ -292,10 +294,12 @@ static int __init test_create_synth_event(void)
 	vals[0] = 777;			/* next_pid_field */
 	vals[1] = (u64)(long)"tiddlywinks";	/* next_comm_field */
 	vals[2] = 1000000;		/* ts_ns */
-	vals[3] = 1000;			/* ts_ms */
-	vals[4] = raw_smp_processor_id(); /* cpu */
-	vals[5] = (u64)(long)"thneed";	/* my_string_field */
-	vals[6] = 398;			/* my_int_field */
+	vals[3] = (u64)(long)"xrayspecs";	/* dynstring_field_1 */
+	vals[4] = 1000;			/* ts_ms */
+	vals[5] = raw_smp_processor_id(); /* cpu */
+	vals[6] = (u64)(long)"thneed";	/* my_string_field */
+	vals[7] = (u64)(long)"kerplunk";	/* dynstring_field_2 */
+	vals[8] = 398;			/* my_int_field */
 
 	/* Now generate a create_synth_test event */
 	ret = synth_event_trace_array(create_synth_test, vals, ARRAY_SIZE(vals));
@@ -422,13 +426,15 @@ static int __init test_trace_synth_event(void)
 	int ret;
 
 	/* Trace some bogus values just for testing */
-	ret = synth_event_trace(create_synth_test, 7,	/* number of values */
+	ret = synth_event_trace(create_synth_test, 9,	/* number of values */
 				(u64)444,		/* next_pid_field */
 				(u64)(long)"clackers",	/* next_comm_field */
 				(u64)1000000,		/* ts_ns */
+				(u64)(long)"viewmaster",/* dynstring_field_1 */
 				(u64)1000,		/* ts_ms */
 				(u64)raw_smp_processor_id(), /* cpu */
 				(u64)(long)"Thneed",	/* my_string_field */
+				(u64)(long)"yoyos",	/* dynstring_field_2 */
 				(u64)999);		/* my_int_field */
 	return ret;
 }
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 812bc5f94b5c..c74a7d157306 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -3289,6 +3289,15 @@ static int check_synth_field(struct synth_event *event,
 
 	field = event->fields[field_pos];
 
+	/*
+	 * A dynamic string synth field can accept static or
+	 * dynamic. A static string synth field can only accept a
+	 * same-sized static string, which is checked for later.
+	 */
+	if (strstr(hist_field->type, "char[") && field->is_string
+	    && field->is_dynamic)
+		return 0;
+
 	if (strcmp(field->type, hist_field->type) != 0) {
 		if (field->size != hist_field->size ||
 		    field->is_signed != hist_field->is_signed)
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index fa8a99828f41..24bc6d61aa40 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -88,7 +88,7 @@ static int synth_event_define_fields(struct trace_event_call *call)
 
 		event->fields[i]->offset = n_u64;
 
-		if (event->fields[i]->is_string) {
+		if (event->fields[i]->is_string && !event->fields[i]->is_dynamic) {
 			offset += STR_VAR_LEN_MAX;
 			n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
 		} else {
@@ -139,6 +139,9 @@ static int synth_field_string_size(char *type)
 	if (len > 3)
 		return -EINVAL;
 
+	if (len == 0)
+		return 0; /* variable-length string */
+
 	strncpy(buf, start, len);
 	buf[len] = '\0';
 
@@ -290,10 +293,25 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter,
 
 		/* parameter values */
 		if (se->fields[i]->is_string) {
-			trace_seq_printf(s, print_fmt, se->fields[i]->name,
-					 (char *)&entry->fields[n_u64],
-					 i == se->n_fields - 1 ? "" : " ");
-			n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
+			if (se->fields[i]->is_dynamic) {
+				u32 offset, data_offset;
+				char *str_field;
+
+				offset = (u32)entry->fields[n_u64];
+				data_offset = offset & 0xffff;
+
+				str_field = (char *)entry + data_offset;
+
+				trace_seq_printf(s, print_fmt, se->fields[i]->name,
+						 str_field,
+						 i == se->n_fields - 1 ? "" : " ");
+				n_u64++;
+			} else {
+				trace_seq_printf(s, print_fmt, se->fields[i]->name,
+						 (char *)&entry->fields[n_u64],
+						 i == se->n_fields - 1 ? "" : " ");
+				n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
+			}
 		} else {
 			struct trace_print_flags __flags[] = {
 			    __def_gfpflag_names, {-1, NULL} };
@@ -325,16 +343,52 @@ static struct trace_event_functions synth_event_funcs = {
 	.trace		= print_synth_event
 };
 
+static unsigned int trace_string(struct synth_trace_event *entry,
+				 struct synth_event *event,
+				 char *str_val,
+				 bool is_dynamic,
+				 unsigned int data_size,
+				 unsigned int *n_u64)
+{
+	unsigned int len = 0;
+	char *str_field;
+
+	if (is_dynamic) {
+		u32 data_offset;
+
+		data_offset = offsetof(typeof(*entry), fields);
+		data_offset += event->n_u64 * sizeof(u64);
+		data_offset += data_size;
+
+		str_field = (char *)entry + data_offset;
+
+		len = strlen(str_val) + 1;
+		strscpy(str_field, str_val, len);
+
+		data_offset |= len << 16;
+		*(u32 *)&entry->fields[*n_u64] = data_offset;
+
+		(*n_u64)++;
+	} else {
+		str_field = (char *)&entry->fields[*n_u64];
+
+		strscpy(str_field, str_val, STR_VAR_LEN_MAX);
+		(*n_u64) += STR_VAR_LEN_MAX / sizeof(u64);
+	}
+
+	return len;
+}
+
 static notrace void trace_event_raw_event_synth(void *__data,
 						u64 *var_ref_vals,
 						unsigned int *var_ref_idx)
 {
+	unsigned int i, n_u64, val_idx, len, data_size = 0;
 	struct trace_event_file *trace_file = __data;
 	struct synth_trace_event *entry;
 	struct trace_event_buffer fbuffer;
 	struct trace_buffer *buffer;
 	struct synth_event *event;
-	unsigned int i, n_u64, val_idx;
 	int fields_size = 0;
 
 	event = trace_file->event_call->data;
@@ -344,6 +398,18 @@ static notrace void trace_event_raw_event_synth(void *__data,
 
 	fields_size = event->n_u64 * sizeof(u64);
 
+	for (i = 0; i < event->n_dynamic_fields; i++) {
+		unsigned int field_pos = event->dynamic_fields[i]->field_pos;
+		char *str_val;
+
+		val_idx = var_ref_idx[field_pos];
+		str_val = (char *)(long)var_ref_vals[val_idx];
+
+		len = strlen(str_val) + 1;
+
+		fields_size += len;
+	}
+
 	/*
 	 * Avoid ring buffer recursion detection, as this event
 	 * is being performed within another event.
@@ -360,10 +426,11 @@ static notrace void trace_event_raw_event_synth(void *__data,
 		val_idx = var_ref_idx[i];
 		if (event->fields[i]->is_string) {
 			char *str_val = (char *)(long)var_ref_vals[val_idx];
-			char *str_field = (char *)&entry->fields[n_u64];
 
-			strscpy(str_field, str_val, STR_VAR_LEN_MAX);
-			n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
+			len = trace_string(entry, event, str_val,
+					   event->fields[i]->is_dynamic,
+					   data_size, &n_u64);
+			data_size += len; /* only dynamic string increments */
 		} else {
 			struct synth_field *field = event->fields[i];
 			u64 val = var_ref_vals[val_idx];
@@ -422,8 +489,13 @@ static int __set_synth_event_print_fmt(struct synth_event *event,
 	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
 
 	for (i = 0; i < event->n_fields; i++) {
-		pos += snprintf(buf + pos, LEN_OR_ZERO,
-				", REC->%s", event->fields[i]->name);
+		if (event->fields[i]->is_dynamic &&
+		    event->fields[i]->is_dynamic)
+			pos += snprintf(buf + pos, LEN_OR_ZERO,
+				", __get_str(%s)", event->fields[i]->name);
+		else
+			pos += snprintf(buf + pos, LEN_OR_ZERO,
+					", REC->%s", event->fields[i]->name);
 	}
 
 #undef LEN_OR_ZERO
@@ -522,9 +594,30 @@ static struct synth_field *parse_synth_field(int argc, const char **argv,
 	}
 
 	size = synth_field_size(field->type);
-	if (size <= 0) {
+	if (size < 0) {
 		ret = -EINVAL;
 		goto free;
+	} else if (size == 0) {
+		if (synth_field_is_string(field->type)) {
+			char *type;
+
+			type = kzalloc(sizeof("__data_loc ") + strlen(field->type) + 1, GFP_KERNEL);
+			if (!type) {
+				ret = -ENOMEM;
+				goto free;
+			}
+
+			strcat(type, "__data_loc ");
+			strcat(type, field->type);
+			kfree(field->type);
+			field->type = type;
+
+			field->is_dynamic = true;
+			size = sizeof(u64);
+		} else {
+			ret = -EINVAL;
+			goto free;
+		}
 	}
 	field->size = size;
 
@@ -532,7 +625,6 @@ static struct synth_field *parse_synth_field(int argc, const char **argv,
 		field->is_string = true;
 
 	field->is_signed = synth_field_signed(field->type);
-
  out:
 	return field;
  free:
@@ -663,6 +755,7 @@ static void free_synth_event(struct synth_event *event)
 		free_synth_field(event->fields[i]);
 
 	kfree(event->fields);
+	kfree(event->dynamic_fields);
 	kfree(event->name);
 	kfree(event->class.system);
 	free_synth_tracepoint(event->tp);
@@ -673,8 +766,8 @@ static void free_synth_event(struct synth_event *event)
 static struct synth_event *alloc_synth_event(const char *name, int n_fields,
 					     struct synth_field **fields)
 {
+	unsigned int i, j, n_dynamic_fields = 0;
 	struct synth_event *event;
-	unsigned int i;
 
 	event = kzalloc(sizeof(*event), GFP_KERNEL);
 	if (!event) {
@@ -696,11 +789,33 @@ static struct synth_event *alloc_synth_event(const char *name, int n_fields,
 		goto out;
 	}
 
+	for (i = 0; i < n_fields; i++)
+		if (fields[i]->is_dynamic)
+			n_dynamic_fields++;
+
+	if (n_dynamic_fields) {
+		event->dynamic_fields = kcalloc(n_dynamic_fields,
+						sizeof(*event->dynamic_fields),
+						GFP_KERNEL);
+		if (!event->dynamic_fields) {
+			free_synth_event(event);
+			event = ERR_PTR(-ENOMEM);
+			goto out;
+		}
+	}
+
 	dyn_event_init(&event->devent, &synth_event_ops);
 
-	for (i = 0; i < n_fields; i++)
+	for (i = 0, j = 0; i < n_fields; i++) {
 		event->fields[i] = fields[i];
 
+		if (fields[i]->is_dynamic) {
+			event->dynamic_fields[j] = fields[i];
+			event->dynamic_fields[j]->field_pos = i;
+			event->dynamic_fields[j++] = fields[i];
+			event->n_dynamic_fields++;
+		}
+	}
 	event->n_fields = n_fields;
  out:
 	return event;
@@ -712,6 +827,10 @@ static int synth_event_check_arg_fn(void *data)
 	int size;
 
 	size = synth_field_size((char *)arg_pair->lhs);
+	if (size == 0) {
+		if (strstr((char *)arg_pair->lhs, "["))
+			return 0;
+	}
 
 	return size ? 0 : -EINVAL;
 }
@@ -1200,10 +1319,9 @@ void synth_event_cmd_init(struct dynevent_cmd *cmd, char *buf, int maxlen)
 EXPORT_SYMBOL_GPL(synth_event_cmd_init);
 
 static inline int
-__synth_event_trace_start(struct trace_event_file *file,
-			  struct synth_event_trace_state *trace_state)
+__synth_event_trace_init(struct trace_event_file *file,
+			 struct synth_event_trace_state *trace_state)
 {
-	int entry_size, fields_size = 0;
 	int ret = 0;
 
 	memset(trace_state, '\0', sizeof(*trace_state));
@@ -1225,8 +1343,20 @@ __synth_event_trace_start(struct trace_event_file *file,
 	}
 
 	trace_state->event = file->event_call->data;
+out:
+	return ret;
+}
+
+static inline int
+__synth_event_trace_start(struct trace_event_file *file,
+			  struct synth_event_trace_state *trace_state,
+			  int dynamic_fields_size)
+{
+	int entry_size, fields_size = 0;
+	int ret = 0;
 
 	fields_size = trace_state->event->n_u64 * sizeof(u64);
+	fields_size += dynamic_fields_size;
 
 	/*
 	 * Avoid ring buffer recursion detection, as this event
@@ -1243,7 +1373,7 @@ __synth_event_trace_start(struct trace_event_file *file,
 		ring_buffer_nest_end(trace_state->buffer);
 		ret = -EINVAL;
 	}
-out:
+
 	return ret;
 }
 
@@ -1276,23 +1406,46 @@ __synth_event_trace_end(struct synth_event_trace_state *trace_state)
  */
 int synth_event_trace(struct trace_event_file *file, unsigned int n_vals, ...)
 {
+	unsigned int i, n_u64, len, data_size = 0;
 	struct synth_event_trace_state state;
-	unsigned int i, n_u64;
 	va_list args;
 	int ret;
 
-	ret = __synth_event_trace_start(file, &state);
+	ret = __synth_event_trace_init(file, &state);
 	if (ret) {
 		if (ret == -ENOENT)
 			ret = 0; /* just disabled, not really an error */
 		return ret;
 	}
 
+	if (state.event->n_dynamic_fields) {
+		va_start(args, n_vals);
+
+		for (i = 0; i < state.event->n_fields; i++) {
+			u64 val = va_arg(args, u64);
+
+			if (state.event->fields[i]->is_string &&
+			    state.event->fields[i]->is_dynamic) {
+				char *str_val = (char *)(long)val;
+
+				data_size += strlen(str_val) + 1;
+			}
+		}
+
+		va_end(args);
+	}
+
+	ret = __synth_event_trace_start(file, &state, data_size);
+	if (ret)
+		return ret;
+
 	if (n_vals != state.event->n_fields) {
 		ret = -EINVAL;
 		goto out;
 	}
 
+	data_size = 0;
+
 	va_start(args, n_vals);
 	for (i = 0, n_u64 = 0; i < state.event->n_fields; i++) {
 		u64 val;
@@ -1301,10 +1454,11 @@ int synth_event_trace(struct trace_event_file *file, unsigned int n_vals, ...)
 
 		if (state.event->fields[i]->is_string) {
 			char *str_val = (char *)(long)val;
-			char *str_field = (char *)&state.entry->fields[n_u64];
 
-			strscpy(str_field, str_val, STR_VAR_LEN_MAX);
-			n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
+			len = trace_string(state.entry, state.event, str_val,
+					   state.event->fields[i]->is_dynamic,
+					   data_size, &n_u64);
+			data_size += len; /* only dynamic string increments */
 		} else {
 			struct synth_field *field = state.event->fields[i];
 
@@ -1357,29 +1511,46 @@ EXPORT_SYMBOL_GPL(synth_event_trace);
 int synth_event_trace_array(struct trace_event_file *file, u64 *vals,
 			    unsigned int n_vals)
 {
+	unsigned int i, n_u64, field_pos, len, data_size = 0;
 	struct synth_event_trace_state state;
-	unsigned int i, n_u64;
+	char *str_val;
 	int ret;
 
-	ret = __synth_event_trace_start(file, &state);
+	ret = __synth_event_trace_init(file, &state);
 	if (ret) {
 		if (ret == -ENOENT)
 			ret = 0; /* just disabled, not really an error */
 		return ret;
 	}
 
+	if (state.event->n_dynamic_fields) {
+		for (i = 0; i < state.event->n_dynamic_fields; i++) {
+			field_pos = state.event->dynamic_fields[i]->field_pos;
+			str_val = (char *)(long)vals[field_pos];
+			len = strlen(str_val) + 1;
+			data_size += len;
+		}
+	}
+
+	ret = __synth_event_trace_start(file, &state, data_size);
+	if (ret)
+		return ret;
+
 	if (n_vals != state.event->n_fields) {
 		ret = -EINVAL;
 		goto out;
 	}
 
+	data_size = 0;
+
 	for (i = 0, n_u64 = 0; i < state.event->n_fields; i++) {
 		if (state.event->fields[i]->is_string) {
 			char *str_val = (char *)(long)vals[i];
-			char *str_field = (char *)&state.entry->fields[n_u64];
 
-			strscpy(str_field, str_val, STR_VAR_LEN_MAX);
-			n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
+			len = trace_string(state.entry, state.event, str_val,
+					   state.event->fields[i]->is_dynamic,
+					   data_size, &n_u64);
+			data_size += len; /* only dynamic string increments */
 		} else {
 			struct synth_field *field = state.event->fields[i];
 			u64 val = vals[i];
@@ -1447,9 +1618,17 @@ int synth_event_trace_start(struct trace_event_file *file,
 	if (!trace_state)
 		return -EINVAL;
 
-	ret = __synth_event_trace_start(file, trace_state);
-	if (ret == -ENOENT)
-		ret = 0; /* just disabled, not really an error */
+	ret = __synth_event_trace_init(file, trace_state);
+	if (ret) {
+		if (ret == -ENOENT)
+			ret = 0; /* just disabled, not really an error */
+		return ret;
+	}
+
+	if (trace_state->event->n_dynamic_fields)
+		return -ENOTSUPP;
+
+	ret = __synth_event_trace_start(file, trace_state, 0);
 
 	return ret;
 }
@@ -1510,6 +1689,11 @@ static int __synth_event_add_val(const char *field_name, u64 val,
 		char *str_val = (char *)(long)val;
 		char *str_field;
 
+		if (field->is_dynamic) { /* add_val can't do dynamic strings */
+			ret = -EINVAL;
+			goto out;
+		}
+
 		if (!str_val) {
 			ret = -EINVAL;
 			goto out;
diff --git a/kernel/trace/trace_synth.h b/kernel/trace/trace_synth.h
index 5166705d1556..6e146b959dcd 100644
--- a/kernel/trace/trace_synth.h
+++ b/kernel/trace/trace_synth.h
@@ -16,6 +16,8 @@ struct synth_field {
 	unsigned int offset;
 	bool is_signed;
 	bool is_string;
+	bool is_dynamic;
+	bool field_pos;
 };
 
 struct synth_event {
@@ -24,6 +26,8 @@ struct synth_event {
 	char					*name;
 	struct synth_field			**fields;
 	unsigned int				n_fields;
+	struct synth_field			**dynamic_fields;
+	unsigned int				n_dynamic_fields;
 	unsigned int				n_u64;
 	struct trace_event_class		class;
 	struct trace_event_call			call;

From 1bc36bd4a8557285870b34cfec7910871049e93e Mon Sep 17 00:00:00 2001
From: Tom Zanussi <zanussi@kernel.org>
Date: Sun, 4 Oct 2020 17:14:07 -0500
Subject: [PATCH 36/58] tracing: Add README information for synthetic_events
 file

Add an entry with a basic description of events/synthetic_events along
with a simple example.

Link: https://lkml.kernel.org/r/3c7f178cf95aaeebc01eda7d95600dd937233eb7.1601848695.git.zanussi@kernel.org

Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Tom Zanussi <zanussi@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 3f2533adae72..73fd0e0c0f39 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5249,7 +5249,12 @@ static const char readme_msg[] =
 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
 	"\t        save(field,...)                      - save current event fields\n"
 #ifdef CONFIG_TRACER_SNAPSHOT
-	"\t        snapshot()                           - snapshot the trace buffer\n"
+	"\t        snapshot()                           - snapshot the trace buffer\n\n"
+#endif
+#ifdef CONFIG_SYNTH_EVENTS
+	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
+	"\t  Write into this file to define/undefine new synthetic events.\n"
+	"\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
 #endif
 #endif
 ;

From cbcd9c83695e0f24c0497e6db8b73c3819ab08d4 Mon Sep 17 00:00:00 2001
From: Tom Zanussi <zanussi@kernel.org>
Date: Sun, 4 Oct 2020 17:14:08 -0500
Subject: [PATCH 37/58] selftests/ftrace: Add test case for synthetic event
 dynamic strings

Add a selftest that defines and traces a synthetic event that uses a
dynamic string event field.

Link: https://lkml.kernel.org/r/74445afb005046d76d59fb06696a2ceaa164dec9.1601848695.git.zanussi@kernel.org

Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Tom Zanussi <zanussi@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 .../trigger-synthetic-event-dynstring.tc      | 31 +++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc

diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc
new file mode 100644
index 000000000000..3d65c856eca3
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc
@@ -0,0 +1,31 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test inter-event histogram trigger trace action with dynamic string param
+# requires: set_event synthetic_events events/sched/sched_process_exec/hist "char name[]' >> synthetic_events":README
+
+fail() { #msg
+    echo $1
+    exit_fail
+}
+
+echo "Test create synthetic event"
+
+echo 'ping_test_latency u64 lat; char filename[]' > synthetic_events
+if [ ! -d events/synthetic/ping_test_latency ]; then
+    fail "Failed to create ping_test_latency synthetic event"
+fi
+
+echo "Test create histogram for synthetic event using trace action and dynamic strings"
+echo "Test histogram dynamic string variables,simple expression support and trace action"
+
+echo 'hist:key=pid:filenamevar=filename:ts0=common_timestamp.usecs' > events/sched/sched_process_exec/trigger
+echo 'hist:key=pid:lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_process_exec).ping_test_latency($lat,$filenamevar) if comm == "ping"' > events/sched/sched_process_exit/trigger
+echo 'hist:keys=filename,lat:sort=filename,lat' > events/synthetic/ping_test_latency/trigger
+
+ping $LOCALHOST -c 5
+
+if ! grep -q "ping" events/synthetic/ping_test_latency/hist; then
+    fail "Failed to create dynamic string trace action inter-event histogram"
+fi
+
+exit 0

From 8db4d6bfbbf9206567fd529dc73dc058b3929db0 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Sun, 4 Oct 2020 17:14:09 -0500
Subject: [PATCH 38/58] tracing: Change synthetic event string format to limit
 printed length

Change the format for printing synthetic field strings to limit the
length of the string printed even if it's not correctly terminated.

Link: https://lore.kernel.org/r/20201002210036.0200371b@oasis.local.home
Link: https://lkml.kernel.org/r/b6bdb34e70d970e8026daa3503db6b8e5cdad524.1601848695.git.zanussi@kernel.org

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_synth.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index 24bc6d61aa40..742ce5f62d6d 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -234,7 +234,7 @@ static const char *synth_field_fmt(char *type)
 	else if (strcmp(type, "gfp_t") == 0)
 		fmt = "%x";
 	else if (synth_field_is_string(type))
-		fmt = "%s";
+		fmt = "%.*s";
 
 	return fmt;
 }
@@ -303,11 +303,13 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter,
 				str_field = (char *)entry + data_offset;
 
 				trace_seq_printf(s, print_fmt, se->fields[i]->name,
+						 STR_VAR_LEN_MAX,
 						 str_field,
 						 i == se->n_fields - 1 ? "" : " ");
 				n_u64++;
 			} else {
 				trace_seq_printf(s, print_fmt, se->fields[i]->name,
+						 STR_VAR_LEN_MAX,
 						 (char *)&entry->fields[n_u64],
 						 i == se->n_fields - 1 ? "" : " ");
 				n_u64 += STR_VAR_LEN_MAX / sizeof(u64);

From 59e65b3358f44d4d0134eca3b6c269f359f21cd5 Mon Sep 17 00:00:00 2001
From: Wei Yang <richard.weiyang@linux.alibaba.com>
Date: Mon, 31 Aug 2020 11:11:00 +0800
Subject: [PATCH 39/58] ftrace: Use fls() to get the bits for dup_hash()

The effect here is to get the number of bits, lets use fls() to do
this job.

Link: https://lkml.kernel.org/r/20200831031104.23322-3-richard.weiyang@linux.alibaba.com

Signed-off-by: Wei Yang <richard.weiyang@linux.alibaba.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 123d520b9261..5633d37d8806 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1370,8 +1370,9 @@ static struct ftrace_hash *dup_hash(struct ftrace_hash *src, int size)
 	/*
 	 * Make the hash size about 1/2 the # found
 	 */
-	for (size /= 2; size; size >>= 1)
-		bits++;
+	bits = fls(size);
+	if (bits)
+		bits--;
 
 	/* Don't allocate too much */
 	if (bits > FTRACE_HASH_MAX_BITS)

From be49313273211c47d1b317d6b2dbe02637c2794c Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 5 Oct 2020 20:21:14 -0400
Subject: [PATCH 40/58] ftrace: Simplify the hash calculation

No need to add a check to subtract the number of bits if bits is zero after
fls(). Just divide the size by two before calling it. This does give the
same answer for size of 0 and 1, but that's fine.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 5633d37d8806..c51a91aea1fd 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1368,11 +1368,10 @@ static struct ftrace_hash *dup_hash(struct ftrace_hash *src, int size)
 	int i;
 
 	/*
-	 * Make the hash size about 1/2 the # found
+	 * Use around half the size (max bit of it), but
+	 * a minimum of 2 is fine (as size of 0 or 1 both give 1 for bits).
 	 */
-	bits = fls(size);
-	if (bits)
-		bits--;
+	bits = fls(size / 2);
 
 	/* Don't allocate too much */
 	if (bits > FTRACE_HASH_MAX_BITS)

From 02dae28f0b542969e44cbc1e14ffc9944cd2975c Mon Sep 17 00:00:00 2001
From: Wei Yang <richard.weiyang@linux.alibaba.com>
Date: Mon, 31 Aug 2020 11:11:01 +0800
Subject: [PATCH 41/58] ftrace: Simplify the dyn_ftrace->flags macro

All the three macro are defined to be used for ftrace_rec_count(). This
can be achieved by (flags & FTRACE_REF_MAX) directly.

Since no other places would use those macros, remove them for clarity.

Also it fixes a typo in the comment.

Link: https://lkml.kernel.org/r/20200831031104.23322-4-richard.weiyang@linux.alibaba.com

Signed-off-by: Wei Yang <richard.weiyang@linux.alibaba.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index e5c2d5cc6e6a..b1f56e3410dc 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -432,7 +432,7 @@ bool is_ftrace_trampoline(unsigned long addr);
  *  DIRECT   - there is a direct function to call
  *
  * When a new ftrace_ops is registered and wants a function to save
- * pt_regs, the rec->flag REGS is set. When the function has been
+ * pt_regs, the rec->flags REGS is set. When the function has been
  * set up to save regs, the REG_EN flag is set. Once a function
  * starts saving regs it will do so until all ftrace_ops are removed
  * from tracing that function.
@@ -450,12 +450,9 @@ enum {
 };
 
 #define FTRACE_REF_MAX_SHIFT	23
-#define FTRACE_FL_BITS		9
-#define FTRACE_FL_MASKED_BITS	((1UL << FTRACE_FL_BITS) - 1)
-#define FTRACE_FL_MASK		(FTRACE_FL_MASKED_BITS << FTRACE_REF_MAX_SHIFT)
 #define FTRACE_REF_MAX		((1UL << FTRACE_REF_MAX_SHIFT) - 1)
 
-#define ftrace_rec_count(rec)	((rec)->flags & ~FTRACE_FL_MASK)
+#define ftrace_rec_count(rec)	((rec)->flags & FTRACE_REF_MAX)
 
 struct dyn_ftrace {
 	unsigned long		ip; /* address of mcount call-site */

From b40c6eabfcd409e022fcb377ac268e3ef9446fde Mon Sep 17 00:00:00 2001
From: Wei Yang <richard.weiyang@linux.alibaba.com>
Date: Mon, 31 Aug 2020 11:11:02 +0800
Subject: [PATCH 42/58] ftrace: Simplify the calculation of page number for
 ftrace_page->records

Based on the following two reasones, we could simplify the calculation:

  - If the number after roundup count is not power of 2, we would
    definitely have more than 1 empty page with a higher order.
  - get_count_order() just return current order, so one lower order
    could meet the requirement.

The calculation could be simplified by lower one order level when pages
are not power of 2.

Link: https://lkml.kernel.org/r/20200831031104.23322-5-richard.weiyang@linux.alibaba.com

Signed-off-by: Wei Yang <richard.weiyang@linux.alibaba.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index c51a91aea1fd..c3be18b4f27b 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3129,18 +3129,19 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)
 static int ftrace_allocate_records(struct ftrace_page *pg, int count)
 {
 	int order;
-	int cnt;
+	int pages, cnt;
 
 	if (WARN_ON(!count))
 		return -EINVAL;
 
-	order = get_count_order(DIV_ROUND_UP(count, ENTRIES_PER_PAGE));
+	pages = DIV_ROUND_UP(count, ENTRIES_PER_PAGE);
+	order = get_count_order(pages);
 
 	/*
 	 * We want to fill as much as possible. No more than a page
 	 * may be empty.
 	 */
-	while ((PAGE_SIZE << order) / ENTRY_SIZE >= count + ENTRIES_PER_PAGE)
+	if (!is_power_of_2(pages))
 		order--;
 
  again:

From 7ba031e8b74c6aa156a0d9867dc13cf817d52047 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 5 Oct 2020 20:37:41 -0400
Subject: [PATCH 43/58] ftrace: Format variable declarations of
 ftrace_allocate_records

I hate when unrelated variables are declared on the same line.
Split them.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index c3be18b4f27b..4833b6a82ce7 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3129,7 +3129,8 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)
 static int ftrace_allocate_records(struct ftrace_page *pg, int count)
 {
 	int order;
-	int pages, cnt;
+	int pages;
+	int cnt;
 
 	if (WARN_ON(!count))
 		return -EINVAL;

From 40dc4a42b97ef5a52ef34a73093a7992faaab15e Mon Sep 17 00:00:00 2001
From: Wei Yang <richard.weiyang@linux.alibaba.com>
Date: Mon, 31 Aug 2020 11:11:04 +0800
Subject: [PATCH 44/58] ftrace: ftrace_global_list is renamed to
 ftrace_ops_list

Fix the comment to comply with the code.

Link: https://lkml.kernel.org/r/20200831031104.23322-7-richard.weiyang@linux.alibaba.com

Signed-off-by: Wei Yang <richard.weiyang@linux.alibaba.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index b1f56e3410dc..1bd3a0356ae4 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -217,11 +217,11 @@ extern struct ftrace_ops __rcu *ftrace_ops_list;
 extern struct ftrace_ops ftrace_list_end;
 
 /*
- * Traverse the ftrace_global_list, invoking all entries.  The reason that we
+ * Traverse the ftrace_ops_list, invoking all entries.  The reason that we
  * can use rcu_dereference_raw_check() is that elements removed from this list
  * are simply leaked, so there is no need to interact with a grace-period
  * mechanism.  The rcu_dereference_raw_check() calls are needed to handle
- * concurrent insertions into the ftrace_global_list.
+ * concurrent insertions into the ftrace_ops_list.
  *
  * Silly Alpha and silly pointer-speculation compiler optimizations!
  */

From 43aa422c0c07135236bd91cbb45b048fd85e73b5 Mon Sep 17 00:00:00 2001
From: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Date: Wed, 30 Sep 2020 19:43:03 +0100
Subject: [PATCH 45/58] tracing: Remove a pointless assignment

The variable 'len' has been assigned a value but is not used after that.
So, remove the assignement.

Link: https://lkml.kernel.org/r/20200930184303.22896-1-sudipm.mukherjee@gmail.com

Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 73fd0e0c0f39..0806fa9f2815 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6667,7 +6667,6 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
 		written = -EFAULT;
 	} else
 		written = cnt;
-	len = cnt;
 
 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
 		/* do not add \n before testing triggers, but add \0 */

From 848183553e431e6e9c2ea2f72421a7a1bbc6532e Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 7 Oct 2020 10:34:34 -0400
Subject: [PATCH 46/58] tracing: Fix synthetic print fmt check for use of
 __get_str()

A cut and paste error had the check to use __get_str() test "is_dynamic"
twice, instead of checking "is_string && is_dynamic".

Link: https://lore.kernel.org/r/d34dccd5-96ba-a2d9-46ea-de8807525deb@canonical.com

Reported-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Tom Zanussi <zanussi@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_synth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index 742ce5f62d6d..3b2dcc42b8ee 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -491,7 +491,7 @@ static int __set_synth_event_print_fmt(struct synth_event *event,
 	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
 
 	for (i = 0; i < event->n_fields; i++) {
-		if (event->fields[i]->is_dynamic &&
+		if (event->fields[i]->is_string &&
 		    event->fields[i]->is_dynamic)
 			pos += snprintf(buf + pos, LEN_OR_ZERO,
 				", __get_str(%s)", event->fields[i]->name);

From 6d9bd139455d9d40fec8c242985996468b34180c Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Tue, 13 Oct 2020 15:48:52 -0400
Subject: [PATCH 47/58] tracing: Check return value of __create_val_fields()
 before using its result

After having a typo for writing a histogram trigger.

Wrote:
  echo 'hist:key=pid:ts=common_timestamp.usec' > events/sched/sched_waking/trigger

Instead of:
  echo 'hist:key=pid:ts=common_timestamp.usecs' > events/sched/sched_waking/trigger

and the following crash happened:

 BUG: kernel NULL pointer dereference, address: 0000000000000008
 #PF: supervisor read access in kernel mode
 #PF: error_code(0x0000) - not-present page
 PGD 0 P4D 0
 Oops: 0000 [#1] PREEMPT SMP PTI
 CPU: 4 PID: 1641 Comm: sh Not tainted 5.9.0-rc5-test+ #549
 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v03.03 07/14/2016
 RIP: 0010:event_hist_trigger_func+0x70b/0x1ee0
 Code: 24 08 89 d5 49 89 cc e9 8c 00 00 00 4c 89 f2 41 b9 00 10 00 00 4c 89 e1 44 89 ee 4c 89 ff e8 dc d3 ff ff 45 89 ea 4b 8b 14 d7 <f6> 42 08 04 74 17 41 8b 8f c0 00 00 00 8d 71 01 41 89 b7 c0 00 00
 RSP: 0018:ffff959213d53db0 EFLAGS: 00010202
 RAX: ffffffffffffffea RBX: 0000000000000000 RCX: 0000000000084c04
 RDX: 0000000000000000 RSI: df7326aefebd174c RDI: 0000000000031080
 RBP: 0000000000000002 R08: 0000000000000001 R09: 0000000000000001
 R10: 0000000000000001 R11: 0000000000000046 R12: ffff959211dcf690
 R13: 0000000000000001 R14: ffff95925a36e370 R15: ffff959251c89800
 FS:  00007fb9ea934740(0000) GS:ffff95925ab00000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 0000000000000008 CR3: 00000000c976c005 CR4: 00000000001706e0
 Call Trace:
  ? trigger_process_regex+0x78/0x110
  trigger_process_regex+0xc5/0x110
  event_trigger_write+0x71/0xd0
  vfs_write+0xca/0x210
  ksys_write+0x70/0xf0
  do_syscall_64+0x33/0x40
  entry_SYSCALL_64_after_hwframe+0x44/0xa9
 RIP: 0033:0x7fb9eaa29487
 Code: 64 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24

This was caused by accessing the hlist_data fields after the call to
__create_val_fields() without checking if the creation succeed.

Link: https://lkml.kernel.org/r/20201013154852.3abd8702@gandalf.local.home

Fixes: 63a1e5de3006 ("tracing: Save normal string variables")
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Reviewed-by: Tom Zanussi <zanussi@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_hist.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index c74a7d157306..96c3f86b81c5 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -3687,7 +3687,7 @@ static int create_var_field(struct hist_trigger_data *hist_data,
 
 	ret = __create_val_field(hist_data, val_idx, file, var_name, expr_str, flags);
 
-	if (hist_data->fields[val_idx]->flags & HIST_FIELD_FL_STRING)
+	if (!ret && hist_data->fields[val_idx]->flags & HIST_FIELD_FL_STRING)
 		hist_data->fields[val_idx]->var_str_idx = hist_data->n_var_str++;
 
 	return ret;

From bbeb97464eefc65f506084fd9f18f21653e01137 Mon Sep 17 00:00:00 2001
From: Gaurav Kohli <gkohli@codeaurora.org>
Date: Tue, 6 Oct 2020 15:03:53 +0530
Subject: [PATCH 48/58] tracing: Fix race in trace_open and buffer resize call

Below race can come, if trace_open and resize of
cpu buffer is running parallely on different cpus
CPUX                                CPUY
				    ring_buffer_resize
				    atomic_read(&buffer->resize_disabled)
tracing_open
tracing_reset_online_cpus
ring_buffer_reset_cpu
rb_reset_cpu
				    rb_update_pages
				    remove/insert pages
resetting pointer

This race can cause data abort or some times infinte loop in
rb_remove_pages and rb_insert_pages while checking pages
for sanity.

Take buffer lock to fix this.

Link: https://lkml.kernel.org/r/1601976833-24377-1-git-send-email-gkohli@codeaurora.org

Cc: stable@vger.kernel.org
Fixes: b23d7a5f4a07a ("ring-buffer: speed up buffer resets by avoiding synchronize_rcu for each CPU")
Signed-off-by: Gaurav Kohli <gkohli@codeaurora.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ring_buffer.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 93ef0ab6ea20..15bf28b13e50 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -4866,6 +4866,9 @@ void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu)
 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return;
 
+	/* prevent another thread from changing buffer sizes */
+	mutex_lock(&buffer->mutex);
+
 	atomic_inc(&cpu_buffer->resize_disabled);
 	atomic_inc(&cpu_buffer->record_disabled);
 
@@ -4876,6 +4879,8 @@ void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu)
 
 	atomic_dec(&cpu_buffer->record_disabled);
 	atomic_dec(&cpu_buffer->resize_disabled);
+
+	mutex_unlock(&buffer->mutex);
 }
 EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
 
@@ -4889,6 +4894,9 @@ void ring_buffer_reset_online_cpus(struct trace_buffer *buffer)
 	struct ring_buffer_per_cpu *cpu_buffer;
 	int cpu;
 
+	/* prevent another thread from changing buffer sizes */
+	mutex_lock(&buffer->mutex);
+
 	for_each_online_buffer_cpu(buffer, cpu) {
 		cpu_buffer = buffer->buffers[cpu];
 
@@ -4907,6 +4915,8 @@ void ring_buffer_reset_online_cpus(struct trace_buffer *buffer)
 		atomic_dec(&cpu_buffer->record_disabled);
 		atomic_dec(&cpu_buffer->resize_disabled);
 	}
+
+	mutex_unlock(&buffer->mutex);
 }
 
 /**

From c16340971949ba6560c8e7c985bad8a180c57aa3 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Sun, 11 Oct 2020 00:28:09 +0900
Subject: [PATCH 49/58] tracing/boot: Add ftrace.instance.*.alloc_snapshot
 option

Add ftrace.instance.*.alloc_snapshot option.

This option has been described in Documentation/trace/boottime-trace.rst
but not implemented yet.

ftrace.[instance.INSTANCE.]alloc_snapshot
   Allocate snapshot buffer.

The difference from kernel.alloc_snapshot is that the kernel.alloc_snapshot
will allocate the buffer only for the main instance, but this can allocate
buffer for any new instances.

Link: https://lkml.kernel.org/r/160234368948.400560.15313384470765915015.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_boot.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index 754e3cf2df3a..c22a152ef0b4 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -284,6 +284,12 @@ trace_boot_enable_tracer(struct trace_array *tr, struct xbc_node *node)
 		if (tracing_set_tracer(tr, p) < 0)
 			pr_err("Failed to set given tracer: %s\n", p);
 	}
+
+	/* Since tracer can free snapshot buffer, allocate snapshot here.*/
+	if (xbc_node_find_value(node, "alloc_snapshot", NULL)) {
+		if (tracing_alloc_snapshot_instance(tr) < 0)
+			pr_err("Failed to allocate snapshot buffer\n");
+	}
 }
 
 static void __init

From 499f7bb0853570c5d9cbf2a2ecbed517852cacfa Mon Sep 17 00:00:00 2001
From: Qiujun Huang <hqjagain@gmail.com>
Date: Sat, 10 Oct 2020 22:09:24 +0800
Subject: [PATCH 50/58] tracing: Fix some typos in comments

s/wihin/within/
s/retrieven/retrieved/
s/suppport/support/
s/wil/will/
s/accidently/accidentally/
s/if the if the/if the/

Link: https://lkml.kernel.org/r/20201010140924.3809-1-hqjagain@gmail.com

Signed-off-by: Qiujun Huang <hqjagain@gmail.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 4 ++--
 kernel/trace/trace.h | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 0806fa9f2815..63c97012ed39 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -9465,7 +9465,7 @@ __init static int tracer_alloc_buffers(void)
 	}
 
 	/*
-	 * Make sure we don't accidently add more trace options
+	 * Make sure we don't accidentally add more trace options
 	 * than we have bits for.
 	 */
 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
@@ -9494,7 +9494,7 @@ __init static int tracer_alloc_buffers(void)
 
 	/*
 	 * The prepare callbacks allocates some memory for the ring buffer. We
-	 * don't free the buffer if the if the CPU goes down. If we were to free
+	 * don't free the buffer if the CPU goes down. If we were to free
 	 * the buffer, then the user would lose any trace that was in the
 	 * buffer. The memory will be removed once the "instance" is removed.
 	 */
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 5b0e797cacdd..f777bb68e660 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -246,7 +246,7 @@ typedef bool (*cond_update_fn_t)(struct trace_array *tr, void *cond_data);
  * tracing_snapshot_cond(tr, cond_data), the cond_data passed in is
  * passed in turn to the cond_snapshot.update() function.  That data
  * can be compared by the update() implementation with the cond_data
- * contained wihin the struct cond_snapshot instance associated with
+ * contained within the struct cond_snapshot instance associated with
  * the trace_array.  Because the tr->max_lock is held throughout the
  * update() call, the update() function can directly retrieve the
  * cond_snapshot and cond_data associated with the per-instance
@@ -271,7 +271,7 @@ typedef bool (*cond_update_fn_t)(struct trace_array *tr, void *cond_data);
  *	take the snapshot, by returning 'true' if so, 'false' if no
  *	snapshot should be taken.  Because the max_lock is held for
  *	the duration of update(), the implementation is safe to
- *	directly retrieven and save any implementation data it needs
+ *	directly retrieved and save any implementation data it needs
  *	to in association with the snapshot.
  */
 struct cond_snapshot {
@@ -573,7 +573,7 @@ struct tracer {
  *   The function callback, which can use the FTRACE bits to
  *    check for recursion.
  *
- * Now if the arch does not suppport a feature, and it calls
+ * Now if the arch does not support a feature, and it calls
  * the global list function which calls the ftrace callback
  * all three of these steps will do a recursion protection.
  * There's no reason to do one if the previous caller already
@@ -1479,7 +1479,7 @@ __trace_event_discard_commit(struct trace_buffer *buffer,
 /*
  * Helper function for event_trigger_unlock_commit{_regs}().
  * If there are event triggers attached to this event that requires
- * filtering against its fields, then they wil be called as the
+ * filtering against its fields, then they will be called as the
  * entry already holds the field information of the current event.
  *
  * It also checks if the event should be discarded or not.

From 7d27adf575e7e917666f4eeca62188353a358060 Mon Sep 17 00:00:00 2001
From: Tom Zanussi <zanussi@kernel.org>
Date: Tue, 13 Oct 2020 09:17:52 -0500
Subject: [PATCH 51/58] tracing: Don't show dynamic string internals in
 synthetic event description

For synthetic event dynamic fields, the type contains "__data_loc",
which is basically an internal part of the type which is only meant to
be displayed in the format, not in the event description itself, which
is confusing to users since they can't use __data_loc on the
command-line to define an event field, which printing it would lead
them to believe.

So filter it out from the description, while leaving it in the type.

Link: https://lkml.kernel.org/r/b3b7baf7813298a5ede4ff02e2e837b91c05a724.1602598160.git.zanussi@kernel.org

Reported-by: Masami Hiramatsu <mhiramat@kernel.org>
Tested-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Tom Zanussi <zanussi@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_synth.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index 3b2dcc42b8ee..b19e2f4159ab 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -1867,14 +1867,22 @@ static int __synth_event_show(struct seq_file *m, struct synth_event *event)
 {
 	struct synth_field *field;
 	unsigned int i;
+	char *type, *t;
 
 	seq_printf(m, "%s\t", event->name);
 
 	for (i = 0; i < event->n_fields; i++) {
 		field = event->fields[i];
 
+		type = field->type;
+		t = strstr(type, "__data_loc");
+		if (t) { /* __data_loc belongs in format but not event desc */
+			t += sizeof("__data_loc");
+			type = t;
+		}
+
 		/* parameter values */
-		seq_printf(m, "%s %s%s", field->type, field->name,
+		seq_printf(m, "%s %s%s", type, field->name,
 			   i == event->n_fields - 1 ? "" : "; ");
 	}
 

From 42d120e2dda5724ea789461413b8691abc315ba1 Mon Sep 17 00:00:00 2001
From: Tom Zanussi <zanussi@kernel.org>
Date: Tue, 13 Oct 2020 09:17:53 -0500
Subject: [PATCH 52/58] tracing: Move is_good_name() from trace_probe.h to
 trace.h

is_good_name() is useful for other trace infrastructure, such as
synthetic events, so make it available via trace.h.

Link: https://lkml.kernel.org/r/cc6d6a2d7da6957fcbe1e2922e76d18d2bb459b4.1602598160.git.zanussi@kernel.org

Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Tested-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Tom Zanussi <zanussi@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.h       | 13 +++++++++++++
 kernel/trace/trace_probe.h | 13 -------------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f777bb68e660..34e0c4d5a6e7 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -19,6 +19,7 @@
 #include <linux/glob.h>
 #include <linux/irq_work.h>
 #include <linux/workqueue.h>
+#include <linux/ctype.h>
 
 #ifdef CONFIG_FTRACE_SYSCALLS
 #include <asm/unistd.h>		/* For NR_SYSCALLS	     */
@@ -2090,4 +2091,16 @@ static __always_inline void trace_iterator_reset(struct trace_iterator *iter)
 	iter->pos = -1;
 }
 
+/* Check the name is good for event/group/fields */
+static inline bool is_good_name(const char *name)
+{
+	if (!isalpha(*name) && *name != '_')
+		return false;
+	while (*++name != '\0') {
+		if (!isalpha(*name) && !isdigit(*name) && *name != '_')
+			return false;
+	}
+	return true;
+}
+
 #endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 04d00987da69..2f703a20c724 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -16,7 +16,6 @@
 #include <linux/tracefs.h>
 #include <linux/types.h>
 #include <linux/string.h>
-#include <linux/ctype.h>
 #include <linux/ptrace.h>
 #include <linux/perf_event.h>
 #include <linux/kprobes.h>
@@ -348,18 +347,6 @@ bool trace_probe_match_command_args(struct trace_probe *tp,
 #define trace_probe_for_each_link_rcu(pos, tp)	\
 	list_for_each_entry_rcu(pos, &(tp)->event->files, list)
 
-/* Check the name is good for event/group/fields */
-static inline bool is_good_name(const char *name)
-{
-	if (!isalpha(*name) && *name != '_')
-		return false;
-	while (*++name != '\0') {
-		if (!isalpha(*name) && !isdigit(*name) && *name != '_')
-			return false;
-	}
-	return true;
-}
-
 #define TPARG_FL_RETURN BIT(0)
 #define TPARG_FL_KERNEL BIT(1)
 #define TPARG_FL_FENTRY BIT(2)

From 9bbb33291f8e44819aaed32d367f702303ff663e Mon Sep 17 00:00:00 2001
From: Tom Zanussi <zanussi@kernel.org>
Date: Tue, 13 Oct 2020 09:17:54 -0500
Subject: [PATCH 53/58] tracing: Check that the synthetic event and field names
 are legal

Call the is_good_name() function used by probe events to make sure
synthetic event and field names don't contain illegal characters and
cause unexpected parsing of synthetic event commands.

Link: https://lkml.kernel.org/r/c4d4bb59d3ac39bcbd70fba0cf837d6b1cedb015.1602598160.git.zanussi@kernel.org

Fixes: 4b147936fa50 (tracing: Add support for 'synthetic' events)
Reported-by: Masami Hiramatsu <mhiramat@kernel.org>
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Tested-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Tom Zanussi <zanussi@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_synth.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index b19e2f4159ab..8c9d6e464da0 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -572,6 +572,10 @@ static struct synth_field *parse_synth_field(int argc, const char **argv,
 		ret = -ENOMEM;
 		goto free;
 	}
+	if (!is_good_name(field->name)) {
+		ret = -EINVAL;
+		goto free;
+	}
 
 	if (field_type[0] == ';')
 		field_type++;
@@ -1112,6 +1116,11 @@ static int __create_synth_event(int argc, const char *name, const char **argv)
 
 	mutex_lock(&event_mutex);
 
+	if (!is_good_name(name)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
 	event = find_synth_event(name);
 	if (event) {
 		ret = -EEXIST;

From d4d704637d935ef5e588b0610b647376dd9f37d4 Mon Sep 17 00:00:00 2001
From: Tom Zanussi <zanussi@kernel.org>
Date: Tue, 13 Oct 2020 09:17:55 -0500
Subject: [PATCH 54/58] tracing: Add synthetic event error logging

Add support for synthetic event error logging, which entails adding a
logging function for it, a way to save the synthetic event command,
and a set of specific synthetic event parse error strings and
handling.

Link: https://lkml.kernel.org/r/ed099c66df13b40cfc633aaeb17f66c37a923066.1602598160.git.zanussi@kernel.org

[ <rostedt@goodmis.org>: wrote save_cmdstr() seq_buf implementation. ]
Tested-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Tom Zanussi <zanussi@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_synth.c | 92 ++++++++++++++++++++++++++++++-
 1 file changed, 90 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index 8c9d6e464da0..f77851018121 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -20,6 +20,48 @@
 
 #include "trace_synth.h"
 
+#undef ERRORS
+#define ERRORS	\
+	C(BAD_NAME,		"Illegal name"),		\
+	C(CMD_INCOMPLETE,	"Incomplete command"),		\
+	C(EVENT_EXISTS,		"Event already exists"),	\
+	C(TOO_MANY_FIELDS,	"Too many fields"),		\
+	C(INCOMPLETE_TYPE,	"Incomplete type"),		\
+	C(INVALID_TYPE,		"Invalid type"),		\
+	C(INVALID_FIELD,	"Invalid field"),		\
+	C(CMD_TOO_LONG,		"Command too long"),
+
+#undef C
+#define C(a, b)		SYNTH_ERR_##a
+
+enum { ERRORS };
+
+#undef C
+#define C(a, b)		b
+
+static const char *err_text[] = { ERRORS };
+
+static char last_cmd[MAX_FILTER_STR_VAL];
+
+static int errpos(const char *str)
+{
+	return err_pos(last_cmd, str);
+}
+
+static void last_cmd_set(char *str)
+{
+	if (!str)
+		return;
+
+	strncpy(last_cmd, str, MAX_FILTER_STR_VAL - 1);
+}
+
+static void synth_err(u8 err_type, u8 err_pos)
+{
+	tracing_log_err(NULL, "synthetic_events", last_cmd, err_text,
+			err_type, err_pos);
+}
+
 static int create_synth_event(int argc, const char **argv);
 static int synth_event_show(struct seq_file *m, struct dyn_event *ev);
 static int synth_event_release(struct dyn_event *ev);
@@ -545,8 +587,10 @@ static struct synth_field *parse_synth_field(int argc, const char **argv,
 		field_type++;
 
 	if (!strcmp(field_type, "unsigned")) {
-		if (argc < 3)
+		if (argc < 3) {
+			synth_err(SYNTH_ERR_INCOMPLETE_TYPE, errpos(field_type));
 			return ERR_PTR(-EINVAL);
+		}
 		prefix = "unsigned ";
 		field_type = argv[1];
 		field_name = argv[2];
@@ -573,6 +617,7 @@ static struct synth_field *parse_synth_field(int argc, const char **argv,
 		goto free;
 	}
 	if (!is_good_name(field->name)) {
+		synth_err(SYNTH_ERR_BAD_NAME, errpos(field_name));
 		ret = -EINVAL;
 		goto free;
 	}
@@ -601,6 +646,7 @@ static struct synth_field *parse_synth_field(int argc, const char **argv,
 
 	size = synth_field_size(field->type);
 	if (size < 0) {
+		synth_err(SYNTH_ERR_INVALID_TYPE, errpos(field_type));
 		ret = -EINVAL;
 		goto free;
 	} else if (size == 0) {
@@ -621,6 +667,7 @@ static struct synth_field *parse_synth_field(int argc, const char **argv,
 			field->is_dynamic = true;
 			size = sizeof(u64);
 		} else {
+			synth_err(SYNTH_ERR_INVALID_TYPE, errpos(field_type));
 			ret = -EINVAL;
 			goto free;
 		}
@@ -1098,12 +1145,47 @@ int synth_event_gen_cmd_array_start(struct dynevent_cmd *cmd, const char *name,
 }
 EXPORT_SYMBOL_GPL(synth_event_gen_cmd_array_start);
 
+static int save_cmdstr(int argc, const char *name, const char **argv)
+{
+	struct seq_buf s;
+	char *buf;
+	int i;
+
+	buf = kzalloc(MAX_DYNEVENT_CMD_LEN, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	seq_buf_init(&s, buf, MAX_DYNEVENT_CMD_LEN);
+
+	seq_buf_puts(&s, name);
+
+	for (i = 0; i < argc; i++) {
+		seq_buf_putc(&s, ' ');
+		seq_buf_puts(&s, argv[i]);
+	}
+
+	if (!seq_buf_buffer_left(&s)) {
+		synth_err(SYNTH_ERR_CMD_TOO_LONG, 0);
+		kfree(buf);
+		return -EINVAL;
+	}
+	buf[s.len] = 0;
+	last_cmd_set(buf);
+
+	kfree(buf);
+	return 0;
+}
+
 static int __create_synth_event(int argc, const char *name, const char **argv)
 {
 	struct synth_field *field, *fields[SYNTH_FIELDS_MAX];
 	struct synth_event *event = NULL;
 	int i, consumed = 0, n_fields = 0, ret = 0;
 
+	ret = save_cmdstr(argc, name, argv);
+	if (ret)
+		return ret;
+
 	/*
 	 * Argument syntax:
 	 *  - Add synthetic event: <event_name> field[;field] ...
@@ -1111,18 +1193,22 @@ static int __create_synth_event(int argc, const char *name, const char **argv)
 	 *      where 'field' = type field_name
 	 */
 
-	if (name[0] == '\0' || argc < 1)
+	if (name[0] == '\0' || argc < 1) {
+		synth_err(SYNTH_ERR_CMD_INCOMPLETE, 0);
 		return -EINVAL;
+	}
 
 	mutex_lock(&event_mutex);
 
 	if (!is_good_name(name)) {
+		synth_err(SYNTH_ERR_BAD_NAME, errpos(name));
 		ret = -EINVAL;
 		goto out;
 	}
 
 	event = find_synth_event(name);
 	if (event) {
+		synth_err(SYNTH_ERR_EVENT_EXISTS, errpos(name));
 		ret = -EEXIST;
 		goto out;
 	}
@@ -1131,6 +1217,7 @@ static int __create_synth_event(int argc, const char *name, const char **argv)
 		if (strcmp(argv[i], ";") == 0)
 			continue;
 		if (n_fields == SYNTH_FIELDS_MAX) {
+			synth_err(SYNTH_ERR_TOO_MANY_FIELDS, 0);
 			ret = -EINVAL;
 			goto err;
 		}
@@ -1145,6 +1232,7 @@ static int __create_synth_event(int argc, const char *name, const char **argv)
 	}
 
 	if (i < argc && strcmp(argv[i], ";") != 0) {
+		synth_err(SYNTH_ERR_INVALID_FIELD, errpos(argv[i]));
 		ret = -EINVAL;
 		goto err;
 	}

From 96378b2088faea68f1fb05ea6b9a566fc569a44c Mon Sep 17 00:00:00 2001
From: Tom Zanussi <zanussi@kernel.org>
Date: Tue, 13 Oct 2020 09:17:56 -0500
Subject: [PATCH 55/58] selftests/ftrace: Change synthetic event name for
 inter-event-combined test

This test uses waking+wakeup_latency as an event name, which doesn't
make sense since it includes an operator.  Illegal names are now
detected by the synthetic event command parsing, which causes this
test to fail.  Change the name to 'waking_plus_wakeup_latency' to
prevent this.

Link: https://lkml.kernel.org/r/a1ee2f76ff28ef7166fb788ca8be968887808920.1602598160.git.zanussi@kernel.org

Fixes: f06eec4d0f2c (selftests: ftrace: Add inter-event hist triggers testcases)
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Tested-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Tom Zanussi <zanussi@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 .../inter-event/trigger-inter-event-combined-hist.tc      | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
index 7449a4b8f1f9..9098f1e7433f 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
@@ -25,12 +25,12 @@ echo 'wakeup_latency u64 lat pid_t pid' >> synthetic_events
 echo 'hist:keys=pid:ts1=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger
 echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts1:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid) if next_comm=="ping"' > events/sched/sched_switch/trigger
 
-echo 'waking+wakeup_latency u64 lat; pid_t pid' >> synthetic_events
-echo 'hist:keys=pid,lat:sort=pid,lat:ww_lat=$waking_lat+$wakeup_lat:onmatch(synthetic.wakeup_latency).waking+wakeup_latency($ww_lat,pid)' >> events/synthetic/wakeup_latency/trigger
-echo 'hist:keys=pid,lat:sort=pid,lat' >> events/synthetic/waking+wakeup_latency/trigger
+echo 'waking_plus_wakeup_latency u64 lat; pid_t pid' >> synthetic_events
+echo 'hist:keys=pid,lat:sort=pid,lat:ww_lat=$waking_lat+$wakeup_lat:onmatch(synthetic.wakeup_latency).waking_plus_wakeup_latency($ww_lat,pid)' >> events/synthetic/wakeup_latency/trigger
+echo 'hist:keys=pid,lat:sort=pid,lat' >> events/synthetic/waking_plus_wakeup_latency/trigger
 
 ping $LOCALHOST -c 3
-if ! grep -q "pid:" events/synthetic/waking+wakeup_latency/hist; then
+if ! grep -q "pid:" events/synthetic/waking_plus_wakeup_latency/hist; then
     fail "Failed to create combined histogram"
 fi
 

From 10819e25799aae564005b6049a45e9808797b3bb Mon Sep 17 00:00:00 2001
From: Tom Zanussi <zanussi@kernel.org>
Date: Tue, 13 Oct 2020 09:17:57 -0500
Subject: [PATCH 56/58] tracing: Handle synthetic event array field type
 checking correctly

Since synthetic event array types are derived from the field name,
there may be a semicolon at the end of the type which should be
stripped off.

If there are more characters following that, normal type string
checking will result in an invalid type.

Without this patch, you can end up with an invalid field type string
that gets displayed in both the synthetic event description and the
event format:

Before:

  # echo 'myevent char str[16]; int v' >> synthetic_events
  # cat synthetic_events
    myevent	char[16]; str; int v

  name: myevent
  ID: 1936
  format:
  	field:unsigned short common_type;	offset:0;	size:2;	signed:0;
  	field:unsigned char common_flags;	offset:2;	size:1;	signed:0;
  	field:unsigned char common_preempt_count;	offset:3;	size:1;	signed:0;
  	field:int common_pid;	offset:4;	size:4;	signed:1;

  	field:char str[16];;	offset:8;	size:16;	signed:1;
  	field:int v;	offset:40;	size:4;	signed:1;

  print fmt: "str=%s, v=%d", REC->str, REC->v

After:

  # echo 'myevent char str[16]; int v' >> synthetic_events
  # cat synthetic_events
    myevent	char[16] str; int v

  # cat events/synthetic/myevent/format
  name: myevent
  ID: 1936
  format:
	field:unsigned short common_type;	offset:0;	size:2;	signed:0;
	field:unsigned char common_flags;	offset:2;	size:1;	signed:0;
	field:unsigned char common_preempt_count;	offset:3;	size:1;	signed:0;
	field:int common_pid;	offset:4;	size:4;	signed:1;

	field:char str[16];	offset:8;	size:16;	signed:1;
	field:int v;	offset:40;	size:4;	signed:1;

  print fmt: "str=%s, v=%d", REC->str, REC->v

Link: https://lkml.kernel.org/r/6587663b56c2d45ab9d8c8472a2110713cdec97d.1602598160.git.zanussi@kernel.org

[ <rostedt@goodmis.org>: wrote parse_synth_field() snippet. ]
Fixes: 4b147936fa50 (tracing: Add support for 'synthetic' events)
Reported-by: Masami Hiramatsu <mhiramat@kernel.org>
Tested-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Tom Zanussi <zanussi@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_synth.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index f77851018121..d239f0e2af8f 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -174,7 +174,7 @@ static int synth_field_string_size(char *type)
 	start += sizeof("char[") - 1;
 
 	end = strchr(type, ']');
-	if (!end || end < start)
+	if (!end || end < start || type + strlen(type) > end + 1)
 		return -EINVAL;
 
 	len = end - start;
@@ -625,8 +625,14 @@ static struct synth_field *parse_synth_field(int argc, const char **argv,
 	if (field_type[0] == ';')
 		field_type++;
 	len = strlen(field_type) + 1;
-	if (array)
-		len += strlen(array);
+
+        if (array) {
+                int l = strlen(array);
+
+                if (l && array[l - 1] == ';')
+                        l--;
+                len += l;
+        }
 	if (prefix)
 		len += strlen(prefix);
 

From 81ff92a93d954efa29e1d514934d6a47cd5f4558 Mon Sep 17 00:00:00 2001
From: Tom Zanussi <zanussi@kernel.org>
Date: Tue, 13 Oct 2020 09:17:58 -0500
Subject: [PATCH 57/58] selftests/ftrace: Add test case for synthetic event
 syntax errors

Add a selftest that verifies that the syntax error messages and caret
positions are correct for most of the possible synthetic event syntax
error cases.

Link: https://lkml.kernel.org/r/af611928ce79f86eaf0af8654f1d7802d5cc21ff.1602598160.git.zanussi@kernel.org

Tested-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Tom Zanussi <zanussi@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 .../trigger-synthetic_event_syntax_errors.tc  | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc

diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc
new file mode 100644
index 000000000000..ada594fe16cb
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc
@@ -0,0 +1,19 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test synthetic_events syntax parser errors
+# requires: synthetic_events error_log
+
+check_error() { # command-with-error-pos-by-^
+    ftrace_errlog_check 'synthetic_events' "$1" 'synthetic_events'
+}
+
+check_error 'myevent ^chr arg'			# INVALID_TYPE
+check_error 'myevent ^char str[];; int v'	# INVALID_TYPE
+check_error 'myevent char ^str]; int v'		# INVALID_NAME
+check_error 'myevent char ^str;[]'		# INVALID_NAME
+check_error 'myevent ^char str[; int v'		# INVALID_TYPE
+check_error '^mye;vent char str[]'		# BAD_NAME
+check_error 'myevent char str[]; ^int'		# INVALID_FIELD
+check_error '^myevent'				# INCOMPLETE_CMD
+
+exit 0

From 6107742d15832011cd0396d821f3225b52551f1f Mon Sep 17 00:00:00 2001
From: Axel Rasmussen <axelrasmussen@google.com>
Date: Fri, 9 Oct 2020 15:05:23 -0700
Subject: [PATCH 58/58] tracing: support "bool" type in synthetic trace events

It's common [1] to define tracepoint fields as "bool" when they contain
a true / false value. Currently, defining a synthetic event with a
"bool" field yields EINVAL. It's possible to work around this by using
e.g. u8 (assuming sizeof(bool) is 1, and bool is unsigned; if either of
these properties don't match, you get EINVAL [2]).

Supporting "bool" explicitly makes hooking this up easier and more
portable for userspace.

[1]: grep -r "bool" include/trace/events/
[2]: check_synth_field() in kernel/trace/trace_events_hist.c

Link: https://lkml.kernel.org/r/20201009220524.485102-2-axelrasmussen@google.com

Acked-by: Michel Lespinasse <walken@google.com>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_synth.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index d239f0e2af8f..3212e2c653b3 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -229,6 +229,8 @@ static int synth_field_size(char *type)
 		size = sizeof(long);
 	else if (strcmp(type, "unsigned long") == 0)
 		size = sizeof(unsigned long);
+	else if (strcmp(type, "bool") == 0)
+		size = sizeof(bool);
 	else if (strcmp(type, "pid_t") == 0)
 		size = sizeof(pid_t);
 	else if (strcmp(type, "gfp_t") == 0)
@@ -271,6 +273,8 @@ static const char *synth_field_fmt(char *type)
 		fmt = "%ld";
 	else if (strcmp(type, "unsigned long") == 0)
 		fmt = "%lu";
+	else if (strcmp(type, "bool") == 0)
+		fmt = "%d";
 	else if (strcmp(type, "pid_t") == 0)
 		fmt = "%d";
 	else if (strcmp(type, "gfp_t") == 0)