Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf updates from Ingo Molnar:
 "Kernel side changes:

   - Ftrace is one of the last W^X violators (after this only KLP is
     left). These patches move it over to the generic text_poke()
     interface and thereby get rid of this oddity. This requires a
     surprising amount of surgery, by Peter Zijlstra.

   - x86/AMD PMUs: add support for 'Large Increment per Cycle Events' to
     count certain types of events that have a special, quirky hw ABI
     (by Kim Phillips)

   - kprobes fixes by Masami Hiramatsu

  Lots of tooling updates as well, the following subcommands were
  updated: annotate/report/top, c2c, clang, record, report/top TUI,
  sched timehist, tests; plus updates were done to the gtk ui, libperf,
  headers and the parser"

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (57 commits)
  perf/x86/amd: Add support for Large Increment per Cycle Events
  perf/x86/amd: Constrain Large Increment per Cycle events
  perf/x86/intel/rapl: Add Comet Lake support
  tracing: Initialize ret in syscall_enter_define_fields()
  perf header: Use last modification time for timestamp
  perf c2c: Fix return type for histogram sorting comparision functions
  perf beauty sockaddr: Fix augmented syscall format warning
  perf/ui/gtk: Fix gtk2 build
  perf ui gtk: Add missing zalloc object
  perf tools: Use %define api.pure full instead of %pure-parser
  libperf: Setup initial evlist::all_cpus value
  perf report: Fix no libunwind compiled warning break s390 issue
  perf tools: Support --prefix/--prefix-strip
  perf report: Clarify in help that --children is default
  tools build: Fix test-clang.cpp with Clang 8+
  perf clang: Fix build with Clang 9
  kprobes: Fix optimize_kprobe()/unoptimize_kprobe() cancellation logic
  tools lib: Fix builds when glibc contains strlcpy()
  perf report/top: Make 'e' visible in the help and make it toggle showing callchains
  perf report/top: Do not offer annotation for symbols without samples
  ...
This commit is contained in:
Linus Torvalds 2020-01-28 09:44:15 -08:00
commit c0e809e244
128 changed files with 2526 additions and 1602 deletions

View File

@ -53,8 +53,8 @@ obj-$(CONFIG_HAVE_ARM_SCU) += smp_scu.o
obj-$(CONFIG_HAVE_ARM_TWD) += smp_twd.o
obj-$(CONFIG_ARM_ARCH_TIMER) += arch_timer.o
obj-$(CONFIG_FUNCTION_TRACER) += entry-ftrace.o
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o insn.o
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o insn.o
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o insn.o patch.o
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o insn.o patch.o
obj-$(CONFIG_JUMP_LABEL) += jump_label.o insn.o patch.o
obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
# Main staffs in KPROBES are in arch/arm/probes/ .

View File

@ -22,6 +22,7 @@
#include <asm/ftrace.h>
#include <asm/insn.h>
#include <asm/set_memory.h>
#include <asm/patch.h>
#ifdef CONFIG_THUMB2_KERNEL
#define NOP 0xf85deb04 /* pop.w {lr} */
@ -35,9 +36,7 @@ static int __ftrace_modify_code(void *data)
{
int *command = data;
set_kernel_text_rw();
ftrace_modify_all_code(*command);
set_kernel_text_ro();
return 0;
}
@ -59,13 +58,11 @@ static unsigned long adjust_address(struct dyn_ftrace *rec, unsigned long addr)
int ftrace_arch_code_modify_prepare(void)
{
set_all_modules_text_rw();
return 0;
}
int ftrace_arch_code_modify_post_process(void)
{
set_all_modules_text_ro();
/* Make sure any TLB misses during machine stop are cleared. */
flush_tlb_all();
return 0;
@ -97,10 +94,7 @@ static int ftrace_modify_code(unsigned long pc, unsigned long old,
return -EINVAL;
}
if (probe_kernel_write((void *)pc, &new, MCOUNT_INSN_SIZE))
return -EPERM;
flush_icache_range(pc, pc + MCOUNT_INSN_SIZE);
__patch_text((void *)pc, new);
return 0;
}

View File

@ -89,18 +89,6 @@ int __init ftrace_dyn_arch_init(void)
return 0;
}
int ftrace_arch_code_modify_prepare(void)
{
set_all_modules_text_rw();
return 0;
}
int ftrace_arch_code_modify_post_process(void)
{
set_all_modules_text_ro();
return 0;
}
static unsigned long gen_sethi_insn(unsigned long addr)
{
unsigned long opcode = 0x46000000;

View File

@ -14,6 +14,10 @@
static DEFINE_PER_CPU(unsigned long, perf_nmi_tstamp);
static unsigned long perf_nmi_window;
/* AMD Event 0xFFF: Merge. Used with Large Increment per Cycle events */
#define AMD_MERGE_EVENT ((0xFULL << 32) | 0xFFULL)
#define AMD_MERGE_EVENT_ENABLE (AMD_MERGE_EVENT | ARCH_PERFMON_EVENTSEL_ENABLE)
static __initconst const u64 amd_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
@ -301,6 +305,25 @@ static inline int amd_pmu_addr_offset(int index, bool eventsel)
return offset;
}
/*
* AMD64 events are detected based on their event codes.
*/
static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc)
{
return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff);
}
static inline bool amd_is_pair_event_code(struct hw_perf_event *hwc)
{
if (!(x86_pmu.flags & PMU_FL_PAIR))
return false;
switch (amd_get_event_code(hwc)) {
case 0x003: return true; /* Retired SSE/AVX FLOPs */
default: return false;
}
}
static int amd_core_hw_config(struct perf_event *event)
{
if (event->attr.exclude_host && event->attr.exclude_guest)
@ -316,15 +339,10 @@ static int amd_core_hw_config(struct perf_event *event)
else if (event->attr.exclude_guest)
event->hw.config |= AMD64_EVENTSEL_HOSTONLY;
return 0;
}
if ((x86_pmu.flags & PMU_FL_PAIR) && amd_is_pair_event_code(&event->hw))
event->hw.flags |= PERF_X86_EVENT_PAIR;
/*
* AMD64 events are detected based on their event codes.
*/
static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc)
{
return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff);
return 0;
}
static inline int amd_is_nb_event(struct hw_perf_event *hwc)
@ -855,6 +873,29 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx,
}
}
static struct event_constraint pair_constraint;
static struct event_constraint *
amd_get_event_constraints_f17h(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
if (amd_is_pair_event_code(hwc))
return &pair_constraint;
return &unconstrained;
}
static void amd_put_event_constraints_f17h(struct cpu_hw_events *cpuc,
struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
if (is_counter_pair(hwc))
--cpuc->n_pair;
}
static ssize_t amd_event_sysfs_show(char *page, u64 config)
{
u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) |
@ -898,33 +939,15 @@ static __initconst const struct x86_pmu amd_pmu = {
static int __init amd_core_pmu_init(void)
{
u64 even_ctr_mask = 0ULL;
int i;
if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
return 0;
/* Avoid calulating the value each time in the NMI handler */
/* Avoid calculating the value each time in the NMI handler */
perf_nmi_window = msecs_to_jiffies(100);
switch (boot_cpu_data.x86) {
case 0x15:
pr_cont("Fam15h ");
x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
break;
case 0x17:
pr_cont("Fam17h ");
/*
* In family 17h, there are no event constraints in the PMC hardware.
* We fallback to using default amd_get_event_constraints.
*/
break;
case 0x18:
pr_cont("Fam18h ");
/* Using default amd_get_event_constraints. */
break;
default:
pr_err("core perfctr but no constraints; unknown hardware!\n");
return -ENODEV;
}
/*
* If core performance counter extensions exists, we must use
* MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also
@ -939,6 +962,32 @@ static int __init amd_core_pmu_init(void)
*/
x86_pmu.amd_nb_constraints = 0;
if (boot_cpu_data.x86 == 0x15) {
pr_cont("Fam15h ");
x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
}
if (boot_cpu_data.x86 >= 0x17) {
pr_cont("Fam17h+ ");
/*
* Family 17h and compatibles have constraints for Large
* Increment per Cycle events: they may only be assigned an
* even numbered counter that has a consecutive adjacent odd
* numbered counter following it.
*/
for (i = 0; i < x86_pmu.num_counters - 1; i += 2)
even_ctr_mask |= 1 << i;
pair_constraint = (struct event_constraint)
__EVENT_CONSTRAINT(0, even_ctr_mask, 0,
x86_pmu.num_counters / 2, 0,
PERF_X86_EVENT_PAIR);
x86_pmu.get_event_constraints = amd_get_event_constraints_f17h;
x86_pmu.put_event_constraints = amd_put_event_constraints_f17h;
x86_pmu.perf_ctr_pair_en = AMD_MERGE_EVENT_ENABLE;
x86_pmu.flags |= PMU_FL_PAIR;
}
pr_cont("core perfctr, ");
return 0;
}

View File

@ -618,6 +618,7 @@ void x86_pmu_disable_all(void)
int idx;
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
u64 val;
if (!test_bit(idx, cpuc->active_mask))
@ -627,6 +628,8 @@ void x86_pmu_disable_all(void)
continue;
val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
wrmsrl(x86_pmu_config_addr(idx), val);
if (is_counter_pair(hwc))
wrmsrl(x86_pmu_config_addr(idx + 1), 0);
}
}
@ -699,7 +702,7 @@ struct sched_state {
int counter; /* counter index */
int unassigned; /* number of events to be assigned left */
int nr_gp; /* number of GP counters used */
unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
u64 used;
};
/* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */
@ -756,8 +759,12 @@ static bool perf_sched_restore_state(struct perf_sched *sched)
sched->saved_states--;
sched->state = sched->saved[sched->saved_states];
/* continue with next counter: */
clear_bit(sched->state.counter++, sched->state.used);
/* this assignment didn't work out */
/* XXX broken vs EVENT_PAIR */
sched->state.used &= ~BIT_ULL(sched->state.counter);
/* try the next one */
sched->state.counter++;
return true;
}
@ -782,20 +789,32 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
idx = INTEL_PMC_IDX_FIXED;
for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) {
if (!__test_and_set_bit(idx, sched->state.used))
goto done;
u64 mask = BIT_ULL(idx);
if (sched->state.used & mask)
continue;
sched->state.used |= mask;
goto done;
}
}
/* Grab the first unused counter starting with idx */
idx = sched->state.counter;
for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) {
if (!__test_and_set_bit(idx, sched->state.used)) {
if (sched->state.nr_gp++ >= sched->max_gp)
return false;
u64 mask = BIT_ULL(idx);
goto done;
}
if (c->flags & PERF_X86_EVENT_PAIR)
mask |= mask << 1;
if (sched->state.used & mask)
continue;
if (sched->state.nr_gp++ >= sched->max_gp)
return false;
sched->state.used |= mask;
goto done;
}
return false;
@ -872,12 +891,10 @@ EXPORT_SYMBOL_GPL(perf_assign_events);
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
{
struct event_constraint *c;
unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
struct perf_event *e;
int n0, i, wmin, wmax, unsched = 0;
struct hw_perf_event *hwc;
bitmap_zero(used_mask, X86_PMC_IDX_MAX);
u64 used_mask = 0;
/*
* Compute the number of events already present; see x86_pmu_add(),
@ -920,6 +937,8 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
* fastpath, try to reuse previous register
*/
for (i = 0; i < n; i++) {
u64 mask;
hwc = &cpuc->event_list[i]->hw;
c = cpuc->event_constraint[i];
@ -931,11 +950,16 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
if (!test_bit(hwc->idx, c->idxmsk))
break;
mask = BIT_ULL(hwc->idx);
if (is_counter_pair(hwc))
mask |= mask << 1;
/* not already used */
if (test_bit(hwc->idx, used_mask))
if (used_mask & mask)
break;
__set_bit(hwc->idx, used_mask);
used_mask |= mask;
if (assign)
assign[i] = hwc->idx;
}
@ -958,6 +982,15 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
READ_ONCE(cpuc->excl_cntrs->exclusive_present))
gpmax /= 2;
/*
* Reduce the amount of available counters to allow fitting
* the extra Merge events needed by large increment events.
*/
if (x86_pmu.flags & PMU_FL_PAIR) {
gpmax = x86_pmu.num_counters - cpuc->n_pair;
WARN_ON(gpmax <= 0);
}
unsched = perf_assign_events(cpuc->event_constraint, n, wmin,
wmax, gpmax, assign);
}
@ -1038,6 +1071,8 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader,
return -EINVAL;
cpuc->event_list[n] = leader;
n++;
if (is_counter_pair(&leader->hw))
cpuc->n_pair++;
}
if (!dogrp)
return n;
@ -1052,6 +1087,8 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader,
cpuc->event_list[n] = event;
n++;
if (is_counter_pair(&event->hw))
cpuc->n_pair++;
}
return n;
}
@ -1237,6 +1274,13 @@ int x86_perf_event_set_period(struct perf_event *event)
wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
/*
* Clear the Merge event counter's upper 16 bits since
* we currently declare a 48-bit counter width
*/
if (is_counter_pair(hwc))
wrmsrl(x86_pmu_event_addr(idx + 1), 0);
/*
* Due to erratum on certan cpu we need
* a second write to be sure the register

View File

@ -741,6 +741,8 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = {
X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_PLUS, model_hsw),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_L, model_skl),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE, model_skl),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_COMETLAKE_L, model_skl),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_COMETLAKE, model_skl),
{},
};

View File

@ -77,6 +77,7 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode)
#define PERF_X86_EVENT_AUTO_RELOAD 0x0200 /* use PEBS auto-reload */
#define PERF_X86_EVENT_LARGE_PEBS 0x0400 /* use large PEBS */
#define PERF_X86_EVENT_PEBS_VIA_PT 0x0800 /* use PT buffer for PEBS */
#define PERF_X86_EVENT_PAIR 0x1000 /* Large Increment per Cycle */
struct amd_nb {
int nb_id; /* NorthBridge id */
@ -272,6 +273,7 @@ struct cpu_hw_events {
struct amd_nb *amd_nb;
/* Inverted mask of bits to clear in the perf_ctr ctrl registers */
u64 perf_ctr_virt_mask;
int n_pair; /* Large increment events */
void *kfree_on_online[X86_PERF_KFREE_MAX];
};
@ -694,6 +696,7 @@ struct x86_pmu {
* AMD bits
*/
unsigned int amd_nb_constraints : 1;
u64 perf_ctr_pair_en;
/*
* Extra registers for events
@ -743,6 +746,7 @@ do { \
#define PMU_FL_EXCL_ENABLED 0x8 /* exclusive counter active */
#define PMU_FL_PEBS_ALL 0x10 /* all events are valid PEBS events */
#define PMU_FL_TFA 0x20 /* deal with TSX force abort */
#define PMU_FL_PAIR 0x40 /* merge counters for large incr. events */
#define EVENT_VAR(_id) event_attr_##_id
#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
@ -838,6 +842,11 @@ int x86_pmu_hw_config(struct perf_event *event);
void x86_pmu_disable_all(void);
static inline bool is_counter_pair(struct hw_perf_event *hwc)
{
return hwc->flags & PERF_X86_EVENT_PAIR;
}
static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
u64 enable_mask)
{
@ -845,6 +854,14 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
if (hwc->extra_reg.reg)
wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config);
/*
* Add enabled Merge event on next counter
* if large increment event being enabled on this counter
*/
if (is_counter_pair(hwc))
wrmsrl(x86_pmu_config_addr(hwc->idx + 1), x86_pmu.perf_ctr_pair_en);
wrmsrl(hwc->config_base, (hwc->config | enable_mask) & ~disable_mask);
}
@ -861,6 +878,9 @@ static inline void x86_pmu_disable_event(struct perf_event *event)
struct hw_perf_event *hwc = &event->hw;
wrmsrl(hwc->config_base, hwc->config);
if (is_counter_pair(hwc))
wrmsrl(x86_pmu_config_addr(hwc->idx + 1), 0);
}
void x86_pmu_enable_event(struct perf_event *event);

View File

@ -47,8 +47,6 @@ struct dyn_arch_ftrace {
/* No extra data needed for x86 */
};
int ftrace_int3_handler(struct pt_regs *regs);
#define FTRACE_GRAPH_TRAMP_ADDR FTRACE_GRAPH_ADDR
#endif /* CONFIG_DYNAMIC_FTRACE */

View File

@ -11,12 +11,11 @@
#include <asm-generic/kprobes.h>
#define BREAKPOINT_INSTRUCTION 0xcc
#ifdef CONFIG_KPROBES
#include <linux/types.h>
#include <linux/ptrace.h>
#include <linux/percpu.h>
#include <asm/text-patching.h>
#include <asm/insn.h>
#define __ARCH_WANT_KPROBES_INSN_SLOT
@ -25,10 +24,7 @@ struct pt_regs;
struct kprobe;
typedef u8 kprobe_opcode_t;
#define RELATIVEJUMP_OPCODE 0xe9
#define RELATIVEJUMP_SIZE 5
#define RELATIVECALL_OPCODE 0xe8
#define RELATIVE_ADDR_SIZE 4
#define MAX_STACK_SIZE 64
#define CUR_STACK_SIZE(ADDR) \
(current_top_of_stack() - (unsigned long)(ADDR))
@ -43,11 +39,11 @@ extern __visible kprobe_opcode_t optprobe_template_entry[];
extern __visible kprobe_opcode_t optprobe_template_val[];
extern __visible kprobe_opcode_t optprobe_template_call[];
extern __visible kprobe_opcode_t optprobe_template_end[];
#define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + RELATIVE_ADDR_SIZE)
#define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + DISP32_SIZE)
#define MAX_OPTINSN_SIZE \
(((unsigned long)optprobe_template_end - \
(unsigned long)optprobe_template_entry) + \
MAX_OPTIMIZED_LENGTH + RELATIVEJUMP_SIZE)
MAX_OPTIMIZED_LENGTH + JMP32_INSN_SIZE)
extern const int kretprobe_blacklist_size;
@ -73,7 +69,7 @@ struct arch_specific_insn {
struct arch_optimized_insn {
/* copy of the original instructions */
kprobe_opcode_t copied_insn[RELATIVE_ADDR_SIZE];
kprobe_opcode_t copied_insn[DISP32_SIZE];
/* detour code buffer */
kprobe_opcode_t *insn;
/* the size of instructions copied to detour code buffer */

View File

@ -81,8 +81,6 @@ int set_direct_map_invalid_noflush(struct page *page);
int set_direct_map_default_noflush(struct page *page);
extern int kernel_set_to_readonly;
void set_kernel_text_rw(void);
void set_kernel_text_ro(void);
#ifdef CONFIG_X86_64
static inline int set_mce_nospec(unsigned long pfn)

View File

@ -25,14 +25,6 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
*/
#define POKE_MAX_OPCODE_SIZE 5
struct text_poke_loc {
void *addr;
int len;
s32 rel32;
u8 opcode;
const u8 text[POKE_MAX_OPCODE_SIZE];
};
extern void text_poke_early(void *addr, const void *opcode, size_t len);
/*
@ -50,21 +42,13 @@ extern void text_poke_early(void *addr, const void *opcode, size_t len);
* an inconsistent instruction while you patch.
*/
extern void *text_poke(void *addr, const void *opcode, size_t len);
extern void text_poke_sync(void);
extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
extern int poke_int3_handler(struct pt_regs *regs);
extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate);
extern void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries);
extern void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
const void *opcode, size_t len, const void *emulate);
extern int after_bootmem;
extern __ro_after_init struct mm_struct *poking_mm;
extern __ro_after_init unsigned long poking_addr;
#ifndef CONFIG_UML_X86
static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip)
{
regs->ip = ip;
}
extern void text_poke_queue(void *addr, const void *opcode, size_t len, const void *emulate);
extern void text_poke_finish(void);
#define INT3_INSN_SIZE 1
#define INT3_INSN_OPCODE 0xCC
@ -78,6 +62,67 @@ static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip)
#define JMP8_INSN_SIZE 2
#define JMP8_INSN_OPCODE 0xEB
#define DISP32_SIZE 4
static inline int text_opcode_size(u8 opcode)
{
int size = 0;
#define __CASE(insn) \
case insn##_INSN_OPCODE: size = insn##_INSN_SIZE; break
switch(opcode) {
__CASE(INT3);
__CASE(CALL);
__CASE(JMP32);
__CASE(JMP8);
}
#undef __CASE
return size;
}
union text_poke_insn {
u8 text[POKE_MAX_OPCODE_SIZE];
struct {
u8 opcode;
s32 disp;
} __attribute__((packed));
};
static __always_inline
void *text_gen_insn(u8 opcode, const void *addr, const void *dest)
{
static union text_poke_insn insn; /* per instance */
int size = text_opcode_size(opcode);
insn.opcode = opcode;
if (size > 1) {
insn.disp = (long)dest - (long)(addr + size);
if (size == 2) {
/*
* Ensure that for JMP9 the displacement
* actually fits the signed byte.
*/
BUG_ON((insn.disp >> 31) != (insn.disp >> 7));
}
}
return &insn.text;
}
extern int after_bootmem;
extern __ro_after_init struct mm_struct *poking_mm;
extern __ro_after_init unsigned long poking_addr;
#ifndef CONFIG_UML_X86
static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip)
{
regs->ip = ip;
}
static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val)
{
/*
@ -85,6 +130,9 @@ static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val)
* stack where the break point happened, and the saving of
* pt_regs. We can extend the original stack because of
* this gap. See the idtentry macro's create_gap option.
*
* Similarly entry_32.S will have a gap on the stack for (any) hardware
* exception and pt_regs; see FIXUP_FRAME.
*/
regs->sp -= sizeof(unsigned long);
*(unsigned long *)regs->sp = val;

View File

@ -936,44 +936,81 @@ static void do_sync_core(void *info)
sync_core();
}
static struct bp_patching_desc {
void text_poke_sync(void)
{
on_each_cpu(do_sync_core, NULL, 1);
}
struct text_poke_loc {
s32 rel_addr; /* addr := _stext + rel_addr */
s32 rel32;
u8 opcode;
const u8 text[POKE_MAX_OPCODE_SIZE];
};
struct bp_patching_desc {
struct text_poke_loc *vec;
int nr_entries;
} bp_patching;
atomic_t refs;
};
static int patch_cmp(const void *key, const void *elt)
static struct bp_patching_desc *bp_desc;
static inline struct bp_patching_desc *try_get_desc(struct bp_patching_desc **descp)
{
struct bp_patching_desc *desc = READ_ONCE(*descp); /* rcu_dereference */
if (!desc || !atomic_inc_not_zero(&desc->refs))
return NULL;
return desc;
}
static inline void put_desc(struct bp_patching_desc *desc)
{
smp_mb__before_atomic();
atomic_dec(&desc->refs);
}
static inline void *text_poke_addr(struct text_poke_loc *tp)
{
return _stext + tp->rel_addr;
}
static int notrace patch_cmp(const void *key, const void *elt)
{
struct text_poke_loc *tp = (struct text_poke_loc *) elt;
if (key < tp->addr)
if (key < text_poke_addr(tp))
return -1;
if (key > tp->addr)
if (key > text_poke_addr(tp))
return 1;
return 0;
}
NOKPROBE_SYMBOL(patch_cmp);
int poke_int3_handler(struct pt_regs *regs)
int notrace poke_int3_handler(struct pt_regs *regs)
{
struct bp_patching_desc *desc;
struct text_poke_loc *tp;
int len, ret = 0;
void *ip;
if (user_mode(regs))
return 0;
/*
* Having observed our INT3 instruction, we now must observe
* bp_patching.nr_entries.
* bp_desc:
*
* nr_entries != 0 INT3
* bp_desc = desc INT3
* WMB RMB
* write INT3 if (nr_entries)
*
* Idem for other elements in bp_patching.
* write INT3 if (desc)
*/
smp_rmb();
if (likely(!bp_patching.nr_entries))
return 0;
if (user_mode(regs))
desc = try_get_desc(&bp_desc);
if (!desc)
return 0;
/*
@ -984,19 +1021,20 @@ int poke_int3_handler(struct pt_regs *regs)
/*
* Skip the binary search if there is a single member in the vector.
*/
if (unlikely(bp_patching.nr_entries > 1)) {
tp = bsearch(ip, bp_patching.vec, bp_patching.nr_entries,
if (unlikely(desc->nr_entries > 1)) {
tp = bsearch(ip, desc->vec, desc->nr_entries,
sizeof(struct text_poke_loc),
patch_cmp);
if (!tp)
return 0;
goto out_put;
} else {
tp = bp_patching.vec;
if (tp->addr != ip)
return 0;
tp = desc->vec;
if (text_poke_addr(tp) != ip)
goto out_put;
}
ip += tp->len;
len = text_opcode_size(tp->opcode);
ip += len;
switch (tp->opcode) {
case INT3_INSN_OPCODE:
@ -1004,7 +1042,7 @@ int poke_int3_handler(struct pt_regs *regs)
* Someone poked an explicit INT3, they'll want to handle it,
* do not consume.
*/
return 0;
goto out_put;
case CALL_INSN_OPCODE:
int3_emulate_call(regs, (long)ip + tp->rel32);
@ -1019,10 +1057,18 @@ int poke_int3_handler(struct pt_regs *regs)
BUG();
}
return 1;
ret = 1;
out_put:
put_desc(desc);
return ret;
}
NOKPROBE_SYMBOL(poke_int3_handler);
#define TP_VEC_MAX (PAGE_SIZE / sizeof(struct text_poke_loc))
static struct text_poke_loc tp_vec[TP_VEC_MAX];
static int tp_vec_nr;
/**
* text_poke_bp_batch() -- update instructions on live kernel on SMP
* @tp: vector of instructions to patch
@ -1044,16 +1090,20 @@ NOKPROBE_SYMBOL(poke_int3_handler);
* replacing opcode
* - sync cores
*/
void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
{
struct bp_patching_desc desc = {
.vec = tp,
.nr_entries = nr_entries,
.refs = ATOMIC_INIT(1),
};
unsigned char int3 = INT3_INSN_OPCODE;
unsigned int i;
int do_sync;
lockdep_assert_held(&text_mutex);
bp_patching.vec = tp;
bp_patching.nr_entries = nr_entries;
smp_store_release(&bp_desc, &desc); /* rcu_assign_pointer */
/*
* Corresponding read barrier in int3 notifier for making sure the
@ -1065,18 +1115,20 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
* First step: add a int3 trap to the address that will be patched.
*/
for (i = 0; i < nr_entries; i++)
text_poke(tp[i].addr, &int3, sizeof(int3));
text_poke(text_poke_addr(&tp[i]), &int3, INT3_INSN_SIZE);
on_each_cpu(do_sync_core, NULL, 1);
text_poke_sync();
/*
* Second step: update all but the first byte of the patched range.
*/
for (do_sync = 0, i = 0; i < nr_entries; i++) {
if (tp[i].len - sizeof(int3) > 0) {
text_poke((char *)tp[i].addr + sizeof(int3),
(const char *)tp[i].text + sizeof(int3),
tp[i].len - sizeof(int3));
int len = text_opcode_size(tp[i].opcode);
if (len - INT3_INSN_SIZE > 0) {
text_poke(text_poke_addr(&tp[i]) + INT3_INSN_SIZE,
(const char *)tp[i].text + INT3_INSN_SIZE,
len - INT3_INSN_SIZE);
do_sync++;
}
}
@ -1087,7 +1139,7 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
* not necessary and we'd be safe even without it. But
* better safe than sorry (plus there's not only Intel).
*/
on_each_cpu(do_sync_core, NULL, 1);
text_poke_sync();
}
/*
@ -1098,19 +1150,20 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
if (tp[i].text[0] == INT3_INSN_OPCODE)
continue;
text_poke(tp[i].addr, tp[i].text, sizeof(int3));
text_poke(text_poke_addr(&tp[i]), tp[i].text, INT3_INSN_SIZE);
do_sync++;
}
if (do_sync)
on_each_cpu(do_sync_core, NULL, 1);
text_poke_sync();
/*
* sync_core() implies an smp_mb() and orders this store against
* the writing of the new instruction.
* Remove and synchronize_rcu(), except we have a very primitive
* refcount based completion.
*/
bp_patching.vec = NULL;
bp_patching.nr_entries = 0;
WRITE_ONCE(bp_desc, NULL); /* RCU_INIT_POINTER */
if (!atomic_dec_and_test(&desc.refs))
atomic_cond_read_acquire(&desc.refs, !VAL);
}
void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
@ -1118,11 +1171,7 @@ void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
{
struct insn insn;
if (!opcode)
opcode = (void *)tp->text;
else
memcpy((void *)tp->text, opcode, len);
memcpy((void *)tp->text, opcode, len);
if (!emulate)
emulate = opcode;
@ -1132,8 +1181,7 @@ void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
BUG_ON(!insn_complete(&insn));
BUG_ON(len != insn.length);
tp->addr = addr;
tp->len = len;
tp->rel_addr = addr - (void *)_stext;
tp->opcode = insn.opcode.bytes[0];
switch (tp->opcode) {
@ -1167,6 +1215,55 @@ void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
}
}
/*
* We hard rely on the tp_vec being ordered; ensure this is so by flushing
* early if needed.
*/
static bool tp_order_fail(void *addr)
{
struct text_poke_loc *tp;
if (!tp_vec_nr)
return false;
if (!addr) /* force */
return true;
tp = &tp_vec[tp_vec_nr - 1];
if ((unsigned long)text_poke_addr(tp) > (unsigned long)addr)
return true;
return false;
}
static void text_poke_flush(void *addr)
{
if (tp_vec_nr == TP_VEC_MAX || tp_order_fail(addr)) {
text_poke_bp_batch(tp_vec, tp_vec_nr);
tp_vec_nr = 0;
}
}
void text_poke_finish(void)
{
text_poke_flush(NULL);
}
void __ref text_poke_queue(void *addr, const void *opcode, size_t len, const void *emulate)
{
struct text_poke_loc *tp;
if (unlikely(system_state == SYSTEM_BOOTING)) {
text_poke_early(addr, opcode, len);
return;
}
text_poke_flush(addr);
tp = &tp_vec[tp_vec_nr++];
text_poke_loc_init(tp, addr, opcode, len, emulate);
}
/**
* text_poke_bp() -- update instructions on live kernel on SMP
* @addr: address to patch
@ -1178,10 +1275,15 @@ void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
* dynamically allocated memory. This function should be used when it is
* not possible to allocate memory.
*/
void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate)
void __ref text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate)
{
struct text_poke_loc tp;
if (unlikely(system_state == SYSTEM_BOOTING)) {
text_poke_early(addr, opcode, len);
return;
}
text_poke_loc_init(&tp, addr, opcode, len, emulate);
text_poke_bp_batch(&tp, 1);
}

View File

@ -35,6 +35,8 @@
#ifdef CONFIG_DYNAMIC_FTRACE
static int ftrace_poke_late = 0;
int ftrace_arch_code_modify_prepare(void)
__acquires(&text_mutex)
{
@ -44,84 +46,37 @@ int ftrace_arch_code_modify_prepare(void)
* ftrace has it set to "read/write".
*/
mutex_lock(&text_mutex);
set_kernel_text_rw();
set_all_modules_text_rw();
ftrace_poke_late = 1;
return 0;
}
int ftrace_arch_code_modify_post_process(void)
__releases(&text_mutex)
{
set_all_modules_text_ro();
set_kernel_text_ro();
/*
* ftrace_make_{call,nop}() may be called during
* module load, and we need to finish the text_poke_queue()
* that they do, here.
*/
text_poke_finish();
ftrace_poke_late = 0;
mutex_unlock(&text_mutex);
return 0;
}
union ftrace_code_union {
char code[MCOUNT_INSN_SIZE];
struct {
unsigned char op;
int offset;
} __attribute__((packed));
};
static int ftrace_calc_offset(long ip, long addr)
{
return (int)(addr - ip);
}
static unsigned char *
ftrace_text_replace(unsigned char op, unsigned long ip, unsigned long addr)
{
static union ftrace_code_union calc;
calc.op = op;
calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr);
return calc.code;
}
static unsigned char *
ftrace_call_replace(unsigned long ip, unsigned long addr)
{
return ftrace_text_replace(0xe8, ip, addr);
}
static inline int
within(unsigned long addr, unsigned long start, unsigned long end)
{
return addr >= start && addr < end;
}
static unsigned long text_ip_addr(unsigned long ip)
{
/*
* On x86_64, kernel text mappings are mapped read-only, so we use
* the kernel identity mapping instead of the kernel text mapping
* to modify the kernel text.
*
* For 32bit kernels, these mappings are same and we can use
* kernel identity mapping to modify code.
*/
if (within(ip, (unsigned long)_text, (unsigned long)_etext))
ip = (unsigned long)__va(__pa_symbol(ip));
return ip;
}
static const unsigned char *ftrace_nop_replace(void)
static const char *ftrace_nop_replace(void)
{
return ideal_nops[NOP_ATOMIC5];
}
static int
ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code,
unsigned const char *new_code)
static const char *ftrace_call_replace(unsigned long ip, unsigned long addr)
{
unsigned char replaced[MCOUNT_INSN_SIZE];
return text_gen_insn(CALL_INSN_OPCODE, (void *)ip, (void *)addr);
}
ftrace_expected = old_code;
static int ftrace_verify_code(unsigned long ip, const char *old_code)
{
char cur_code[MCOUNT_INSN_SIZE];
/*
* Note:
@ -130,31 +85,46 @@ ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code,
* Carefully read and modify the code with probe_kernel_*(), and make
* sure what we read is what we expected it to be before modifying it.
*/
/* read the text we want to modify */
if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
if (probe_kernel_read(cur_code, (void *)ip, MCOUNT_INSN_SIZE)) {
WARN_ON(1);
return -EFAULT;
}
/* Make sure it is what we expect it to be */
if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
if (memcmp(cur_code, old_code, MCOUNT_INSN_SIZE) != 0) {
WARN_ON(1);
return -EINVAL;
ip = text_ip_addr(ip);
/* replace the text with the new text */
if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE))
return -EPERM;
sync_core();
}
return 0;
}
int ftrace_make_nop(struct module *mod,
struct dyn_ftrace *rec, unsigned long addr)
/*
* Marked __ref because it calls text_poke_early() which is .init.text. That is
* ok because that call will happen early, during boot, when .init sections are
* still present.
*/
static int __ref
ftrace_modify_code_direct(unsigned long ip, const char *old_code,
const char *new_code)
{
int ret = ftrace_verify_code(ip, old_code);
if (ret)
return ret;
/* replace the text with the new text */
if (ftrace_poke_late)
text_poke_queue((void *)ip, new_code, MCOUNT_INSN_SIZE, NULL);
else
text_poke_early((void *)ip, new_code, MCOUNT_INSN_SIZE);
return 0;
}
int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr)
{
unsigned const char *new, *old;
unsigned long ip = rec->ip;
const char *new, *old;
old = ftrace_call_replace(ip, addr);
new = ftrace_nop_replace();
@ -168,19 +138,20 @@ int ftrace_make_nop(struct module *mod,
* just modify the code directly.
*/
if (addr == MCOUNT_ADDR)
return ftrace_modify_code_direct(rec->ip, old, new);
return ftrace_modify_code_direct(ip, old, new);
ftrace_expected = NULL;
/* Normal cases use add_brk_on_nop */
/*
* x86 overrides ftrace_replace_code -- this function will never be used
* in this case.
*/
WARN_ONCE(1, "invalid use of ftrace_make_nop");
return -EINVAL;
}
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
unsigned const char *new, *old;
unsigned long ip = rec->ip;
const char *new, *old;
old = ftrace_nop_replace();
new = ftrace_call_replace(ip, addr);
@ -189,43 +160,6 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
return ftrace_modify_code_direct(rec->ip, old, new);
}
/*
* The modifying_ftrace_code is used to tell the breakpoint
* handler to call ftrace_int3_handler(). If it fails to
* call this handler for a breakpoint added by ftrace, then
* the kernel may crash.
*
* As atomic_writes on x86 do not need a barrier, we do not
* need to add smp_mb()s for this to work. It is also considered
* that we can not read the modifying_ftrace_code before
* executing the breakpoint. That would be quite remarkable if
* it could do that. Here's the flow that is required:
*
* CPU-0 CPU-1
*
* atomic_inc(mfc);
* write int3s
* <trap-int3> // implicit (r)mb
* if (atomic_read(mfc))
* call ftrace_int3_handler()
*
* Then when we are finished:
*
* atomic_dec(mfc);
*
* If we hit a breakpoint that was not set by ftrace, it does not
* matter if ftrace_int3_handler() is called or not. It will
* simply be ignored. But it is crucial that a ftrace nop/caller
* breakpoint is handled. No other user should ever place a
* breakpoint on an ftrace nop/caller location. It must only
* be done by this code.
*/
atomic_t modifying_ftrace_code __read_mostly;
static int
ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
unsigned const char *new_code);
/*
* Should never be called:
* As it is only called by __ftrace_replace_code() which is called by
@ -238,452 +172,84 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
unsigned long addr)
{
WARN_ON(1);
ftrace_expected = NULL;
return -EINVAL;
}
static unsigned long ftrace_update_func;
static unsigned long ftrace_update_func_call;
static int update_ftrace_func(unsigned long ip, void *new)
{
unsigned char old[MCOUNT_INSN_SIZE];
int ret;
memcpy(old, (void *)ip, MCOUNT_INSN_SIZE);
ftrace_update_func = ip;
/* Make sure the breakpoints see the ftrace_update_func update */
smp_wmb();
/* See comment above by declaration of modifying_ftrace_code */
atomic_inc(&modifying_ftrace_code);
ret = ftrace_modify_code(ip, old, new);
atomic_dec(&modifying_ftrace_code);
return ret;
}
int ftrace_update_ftrace_func(ftrace_func_t func)
{
unsigned long ip = (unsigned long)(&ftrace_call);
unsigned char *new;
int ret;
ftrace_update_func_call = (unsigned long)func;
new = ftrace_call_replace(ip, (unsigned long)func);
ret = update_ftrace_func(ip, new);
/* Also update the regs callback function */
if (!ret) {
ip = (unsigned long)(&ftrace_regs_call);
new = ftrace_call_replace(ip, (unsigned long)func);
ret = update_ftrace_func(ip, new);
}
return ret;
}
static nokprobe_inline int is_ftrace_caller(unsigned long ip)
{
if (ip == ftrace_update_func)
return 1;
return 0;
}
/*
* A breakpoint was added to the code address we are about to
* modify, and this is the handle that will just skip over it.
* We are either changing a nop into a trace call, or a trace
* call to a nop. While the change is taking place, we treat
* it just like it was a nop.
*/
int ftrace_int3_handler(struct pt_regs *regs)
{
unsigned long ip;
const char *new;
if (WARN_ON_ONCE(!regs))
return 0;
ip = (unsigned long)(&ftrace_call);
new = ftrace_call_replace(ip, (unsigned long)func);
text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL);
ip = regs->ip - INT3_INSN_SIZE;
if (ftrace_location(ip)) {
int3_emulate_call(regs, (unsigned long)ftrace_regs_caller);
return 1;
} else if (is_ftrace_caller(ip)) {
if (!ftrace_update_func_call) {
int3_emulate_jmp(regs, ip + CALL_INSN_SIZE);
return 1;
}
int3_emulate_call(regs, ftrace_update_func_call);
return 1;
}
ip = (unsigned long)(&ftrace_regs_call);
new = ftrace_call_replace(ip, (unsigned long)func);
text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL);
return 0;
}
NOKPROBE_SYMBOL(ftrace_int3_handler);
static int ftrace_write(unsigned long ip, const char *val, int size)
{
ip = text_ip_addr(ip);
if (probe_kernel_write((void *)ip, val, size))
return -EPERM;
return 0;
}
static int add_break(unsigned long ip, const char *old)
{
unsigned char replaced[MCOUNT_INSN_SIZE];
unsigned char brk = BREAKPOINT_INSTRUCTION;
if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
return -EFAULT;
ftrace_expected = old;
/* Make sure it is what we expect it to be */
if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0)
return -EINVAL;
return ftrace_write(ip, &brk, 1);
}
static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr)
{
unsigned const char *old;
unsigned long ip = rec->ip;
old = ftrace_call_replace(ip, addr);
return add_break(rec->ip, old);
}
static int add_brk_on_nop(struct dyn_ftrace *rec)
{
unsigned const char *old;
old = ftrace_nop_replace();
return add_break(rec->ip, old);
}
static int add_breakpoints(struct dyn_ftrace *rec, bool enable)
{
unsigned long ftrace_addr;
int ret;
ftrace_addr = ftrace_get_addr_curr(rec);
ret = ftrace_test_record(rec, enable);
switch (ret) {
case FTRACE_UPDATE_IGNORE:
return 0;
case FTRACE_UPDATE_MAKE_CALL:
/* converting nop to call */
return add_brk_on_nop(rec);
case FTRACE_UPDATE_MODIFY_CALL:
case FTRACE_UPDATE_MAKE_NOP:
/* converting a call to a nop */
return add_brk_on_call(rec, ftrace_addr);
}
return 0;
}
/*
* On error, we need to remove breakpoints. This needs to
* be done caefully. If the address does not currently have a
* breakpoint, we know we are done. Otherwise, we look at the
* remaining 4 bytes of the instruction. If it matches a nop
* we replace the breakpoint with the nop. Otherwise we replace
* it with the call instruction.
*/
static int remove_breakpoint(struct dyn_ftrace *rec)
{
unsigned char ins[MCOUNT_INSN_SIZE];
unsigned char brk = BREAKPOINT_INSTRUCTION;
const unsigned char *nop;
unsigned long ftrace_addr;
unsigned long ip = rec->ip;
/* If we fail the read, just give up */
if (probe_kernel_read(ins, (void *)ip, MCOUNT_INSN_SIZE))
return -EFAULT;
/* If this does not have a breakpoint, we are done */
if (ins[0] != brk)
return 0;
nop = ftrace_nop_replace();
/*
* If the last 4 bytes of the instruction do not match
* a nop, then we assume that this is a call to ftrace_addr.
*/
if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) {
/*
* For extra paranoidism, we check if the breakpoint is on
* a call that would actually jump to the ftrace_addr.
* If not, don't touch the breakpoint, we make just create
* a disaster.
*/
ftrace_addr = ftrace_get_addr_new(rec);
nop = ftrace_call_replace(ip, ftrace_addr);
if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0)
goto update;
/* Check both ftrace_addr and ftrace_old_addr */
ftrace_addr = ftrace_get_addr_curr(rec);
nop = ftrace_call_replace(ip, ftrace_addr);
ftrace_expected = nop;
if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0)
return -EINVAL;
}
update:
return ftrace_write(ip, nop, 1);
}
static int add_update_code(unsigned long ip, unsigned const char *new)
{
/* skip breakpoint */
ip++;
new++;
return ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1);
}
static int add_update_call(struct dyn_ftrace *rec, unsigned long addr)
{
unsigned long ip = rec->ip;
unsigned const char *new;
new = ftrace_call_replace(ip, addr);
return add_update_code(ip, new);
}
static int add_update_nop(struct dyn_ftrace *rec)
{
unsigned long ip = rec->ip;
unsigned const char *new;
new = ftrace_nop_replace();
return add_update_code(ip, new);
}
static int add_update(struct dyn_ftrace *rec, bool enable)
{
unsigned long ftrace_addr;
int ret;
ret = ftrace_test_record(rec, enable);
ftrace_addr = ftrace_get_addr_new(rec);
switch (ret) {
case FTRACE_UPDATE_IGNORE:
return 0;
case FTRACE_UPDATE_MODIFY_CALL:
case FTRACE_UPDATE_MAKE_CALL:
/* converting nop to call */
return add_update_call(rec, ftrace_addr);
case FTRACE_UPDATE_MAKE_NOP:
/* converting a call to a nop */
return add_update_nop(rec);
}
return 0;
}
static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr)
{
unsigned long ip = rec->ip;
unsigned const char *new;
new = ftrace_call_replace(ip, addr);
return ftrace_write(ip, new, 1);
}
static int finish_update_nop(struct dyn_ftrace *rec)
{
unsigned long ip = rec->ip;
unsigned const char *new;
new = ftrace_nop_replace();
return ftrace_write(ip, new, 1);
}
static int finish_update(struct dyn_ftrace *rec, bool enable)
{
unsigned long ftrace_addr;
int ret;
ret = ftrace_update_record(rec, enable);
ftrace_addr = ftrace_get_addr_new(rec);
switch (ret) {
case FTRACE_UPDATE_IGNORE:
return 0;
case FTRACE_UPDATE_MODIFY_CALL:
case FTRACE_UPDATE_MAKE_CALL:
/* converting nop to call */
return finish_update_call(rec, ftrace_addr);
case FTRACE_UPDATE_MAKE_NOP:
/* converting a call to a nop */
return finish_update_nop(rec);
}
return 0;
}
static void do_sync_core(void *data)
{
sync_core();
}
static void run_sync(void)
{
int enable_irqs;
/* No need to sync if there's only one CPU */
if (num_online_cpus() == 1)
return;
enable_irqs = irqs_disabled();
/* We may be called with interrupts disabled (on bootup). */
if (enable_irqs)
local_irq_enable();
on_each_cpu(do_sync_core, NULL, 1);
if (enable_irqs)
local_irq_disable();
}
void ftrace_replace_code(int enable)
{
struct ftrace_rec_iter *iter;
struct dyn_ftrace *rec;
const char *report = "adding breakpoints";
int count = 0;
const char *new, *old;
int ret;
for_ftrace_rec_iter(iter) {
rec = ftrace_rec_iter_record(iter);
ret = add_breakpoints(rec, enable);
if (ret)
goto remove_breakpoints;
count++;
switch (ftrace_test_record(rec, enable)) {
case FTRACE_UPDATE_IGNORE:
default:
continue;
case FTRACE_UPDATE_MAKE_CALL:
old = ftrace_nop_replace();
break;
case FTRACE_UPDATE_MODIFY_CALL:
case FTRACE_UPDATE_MAKE_NOP:
old = ftrace_call_replace(rec->ip, ftrace_get_addr_curr(rec));
break;
}
ret = ftrace_verify_code(rec->ip, old);
if (ret) {
ftrace_bug(ret, rec);
return;
}
}
run_sync();
report = "updating code";
count = 0;
for_ftrace_rec_iter(iter) {
rec = ftrace_rec_iter_record(iter);
ret = add_update(rec, enable);
if (ret)
goto remove_breakpoints;
count++;
switch (ftrace_test_record(rec, enable)) {
case FTRACE_UPDATE_IGNORE:
default:
continue;
case FTRACE_UPDATE_MAKE_CALL:
case FTRACE_UPDATE_MODIFY_CALL:
new = ftrace_call_replace(rec->ip, ftrace_get_addr_new(rec));
break;
case FTRACE_UPDATE_MAKE_NOP:
new = ftrace_nop_replace();
break;
}
text_poke_queue((void *)rec->ip, new, MCOUNT_INSN_SIZE, NULL);
ftrace_update_record(rec, enable);
}
run_sync();
report = "removing breakpoints";
count = 0;
for_ftrace_rec_iter(iter) {
rec = ftrace_rec_iter_record(iter);
ret = finish_update(rec, enable);
if (ret)
goto remove_breakpoints;
count++;
}
run_sync();
return;
remove_breakpoints:
pr_warn("Failed on %s (%d):\n", report, count);
ftrace_bug(ret, rec);
for_ftrace_rec_iter(iter) {
rec = ftrace_rec_iter_record(iter);
/*
* Breakpoints are handled only when this function is in
* progress. The system could not work with them.
*/
if (remove_breakpoint(rec))
BUG();
}
run_sync();
}
static int
ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
unsigned const char *new_code)
{
int ret;
ret = add_break(ip, old_code);
if (ret)
goto out;
run_sync();
ret = add_update_code(ip, new_code);
if (ret)
goto fail_update;
run_sync();
ret = ftrace_write(ip, new_code, 1);
/*
* The breakpoint is handled only when this function is in progress.
* The system could not work if we could not remove it.
*/
BUG_ON(ret);
out:
run_sync();
return ret;
fail_update:
/* Also here the system could not work with the breakpoint */
if (ftrace_write(ip, old_code, 1))
BUG();
goto out;
text_poke_finish();
}
void arch_ftrace_update_code(int command)
{
/* See comment above by declaration of modifying_ftrace_code */
atomic_inc(&modifying_ftrace_code);
ftrace_modify_all_code(command);
atomic_dec(&modifying_ftrace_code);
}
int __init ftrace_dyn_arch_init(void)
@ -748,6 +314,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
unsigned long start_offset;
unsigned long end_offset;
unsigned long op_offset;
unsigned long call_offset;
unsigned long offset;
unsigned long npages;
unsigned long size;
@ -764,10 +331,12 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
start_offset = (unsigned long)ftrace_regs_caller;
end_offset = (unsigned long)ftrace_regs_caller_end;
op_offset = (unsigned long)ftrace_regs_caller_op_ptr;
call_offset = (unsigned long)ftrace_regs_call;
} else {
start_offset = (unsigned long)ftrace_caller;
end_offset = (unsigned long)ftrace_epilogue;
op_offset = (unsigned long)ftrace_caller_op_ptr;
call_offset = (unsigned long)ftrace_call;
}
size = end_offset - start_offset;
@ -824,16 +393,21 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
/* put in the new offset to the ftrace_ops */
memcpy(trampoline + op_offset, &op_ptr, OP_REF_SIZE);
/* put in the call to the function */
mutex_lock(&text_mutex);
call_offset -= start_offset;
memcpy(trampoline + call_offset,
text_gen_insn(CALL_INSN_OPCODE,
trampoline + call_offset,
ftrace_ops_get_func(ops)), CALL_INSN_SIZE);
mutex_unlock(&text_mutex);
/* ALLOC_TRAMP flags lets us know we created it */
ops->flags |= FTRACE_OPS_FL_ALLOC_TRAMP;
set_vm_flush_reset_perms(trampoline);
/*
* Module allocation needs to be completed by making the page
* executable. The page is still writable, which is a security hazard,
* but anyhow ftrace breaks W^X completely.
*/
set_memory_ro((unsigned long)trampoline, npages);
set_memory_x((unsigned long)trampoline, npages);
return (unsigned long)trampoline;
fail:
@ -860,62 +434,54 @@ static unsigned long calc_trampoline_call_offset(bool save_regs)
void arch_ftrace_update_trampoline(struct ftrace_ops *ops)
{
ftrace_func_t func;
unsigned char *new;
unsigned long offset;
unsigned long ip;
unsigned int size;
int ret, npages;
const char *new;
if (ops->trampoline) {
/*
* The ftrace_ops caller may set up its own trampoline.
* In such a case, this code must not modify it.
*/
if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
return;
npages = PAGE_ALIGN(ops->trampoline_size) >> PAGE_SHIFT;
set_memory_rw(ops->trampoline, npages);
} else {
if (!ops->trampoline) {
ops->trampoline = create_trampoline(ops, &size);
if (!ops->trampoline)
return;
ops->trampoline_size = size;
npages = PAGE_ALIGN(size) >> PAGE_SHIFT;
return;
}
/*
* The ftrace_ops caller may set up its own trampoline.
* In such a case, this code must not modify it.
*/
if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
return;
offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS);
ip = ops->trampoline + offset;
func = ftrace_ops_get_func(ops);
ftrace_update_func_call = (unsigned long)func;
mutex_lock(&text_mutex);
/* Do a safe modify in case the trampoline is executing */
new = ftrace_call_replace(ip, (unsigned long)func);
ret = update_ftrace_func(ip, new);
set_memory_ro(ops->trampoline, npages);
/* The update should never fail */
WARN_ON(ret);
text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL);
mutex_unlock(&text_mutex);
}
/* Return the address of the function the trampoline calls */
static void *addr_from_call(void *ptr)
{
union ftrace_code_union calc;
union text_poke_insn call;
int ret;
ret = probe_kernel_read(&calc, ptr, MCOUNT_INSN_SIZE);
ret = probe_kernel_read(&call, ptr, CALL_INSN_SIZE);
if (WARN_ON_ONCE(ret < 0))
return NULL;
/* Make sure this is a call */
if (WARN_ON_ONCE(calc.op != 0xe8)) {
pr_warn("Expected e8, got %x\n", calc.op);
if (WARN_ON_ONCE(call.opcode != CALL_INSN_OPCODE)) {
pr_warn("Expected E8, got %x\n", call.opcode);
return NULL;
}
return ptr + MCOUNT_INSN_SIZE + calc.offset;
return ptr + CALL_INSN_SIZE + call.disp;
}
void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
@ -982,19 +548,18 @@ void arch_ftrace_trampoline_free(struct ftrace_ops *ops)
#ifdef CONFIG_DYNAMIC_FTRACE
extern void ftrace_graph_call(void);
static unsigned char *ftrace_jmp_replace(unsigned long ip, unsigned long addr)
static const char *ftrace_jmp_replace(unsigned long ip, unsigned long addr)
{
return ftrace_text_replace(0xe9, ip, addr);
return text_gen_insn(JMP32_INSN_OPCODE, (void *)ip, (void *)addr);
}
static int ftrace_mod_jmp(unsigned long ip, void *func)
{
unsigned char *new;
const char *new;
ftrace_update_func_call = 0UL;
new = ftrace_jmp_replace(ip, (unsigned long)func);
return update_ftrace_func(ip, new);
text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL);
return 0;
}
int ftrace_enable_ftrace_graph_caller(void)
@ -1020,10 +585,9 @@ int ftrace_disable_ftrace_graph_caller(void)
void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
unsigned long frame_pointer)
{
unsigned long return_hooker = (unsigned long)&return_to_handler;
unsigned long old;
int faulted;
unsigned long return_hooker = (unsigned long)
&return_to_handler;
/*
* When resuming from suspend-to-ram, this function can be indirectly

View File

@ -16,15 +16,7 @@
#include <asm/alternative.h>
#include <asm/text-patching.h>
union jump_code_union {
char code[JUMP_LABEL_NOP_SIZE];
struct {
char jump;
int offset;
} __attribute__((packed));
};
static void bug_at(unsigned char *ip, int line)
static void bug_at(const void *ip, int line)
{
/*
* The location is not an op that we were expecting.
@ -35,42 +27,42 @@ static void bug_at(unsigned char *ip, int line)
BUG();
}
static void __jump_label_set_jump_code(struct jump_entry *entry,
enum jump_label_type type,
union jump_code_union *code,
int init)
static const void *
__jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type, int init)
{
const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];
const void *expect;
const void *expect, *code;
const void *addr, *dest;
int line;
code->jump = 0xe9;
code->offset = jump_entry_target(entry) -
(jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
addr = (void *)jump_entry_code(entry);
dest = (void *)jump_entry_target(entry);
code = text_gen_insn(JMP32_INSN_OPCODE, addr, dest);
if (init) {
expect = default_nop; line = __LINE__;
} else if (type == JUMP_LABEL_JMP) {
expect = ideal_nop; line = __LINE__;
} else {
expect = code->code; line = __LINE__;
expect = code; line = __LINE__;
}
if (memcmp((void *)jump_entry_code(entry), expect, JUMP_LABEL_NOP_SIZE))
bug_at((void *)jump_entry_code(entry), line);
if (memcmp(addr, expect, JUMP_LABEL_NOP_SIZE))
bug_at(addr, line);
if (type == JUMP_LABEL_NOP)
memcpy(code, ideal_nop, JUMP_LABEL_NOP_SIZE);
code = ideal_nop;
return code;
}
static void __ref __jump_label_transform(struct jump_entry *entry,
enum jump_label_type type,
int init)
static void inline __jump_label_transform(struct jump_entry *entry,
enum jump_label_type type,
int init)
{
union jump_code_union code;
__jump_label_set_jump_code(entry, type, &code, init);
const void *opcode = __jump_label_set_jump_code(entry, type, init);
/*
* As long as only a single processor is running and the code is still
@ -84,31 +76,33 @@ static void __ref __jump_label_transform(struct jump_entry *entry,
* always nop being the 'currently valid' instruction
*/
if (init || system_state == SYSTEM_BOOTING) {
text_poke_early((void *)jump_entry_code(entry), &code,
text_poke_early((void *)jump_entry_code(entry), opcode,
JUMP_LABEL_NOP_SIZE);
return;
}
text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE, NULL);
text_poke_bp((void *)jump_entry_code(entry), opcode, JUMP_LABEL_NOP_SIZE, NULL);
}
static void __ref jump_label_transform(struct jump_entry *entry,
enum jump_label_type type,
int init)
{
mutex_lock(&text_mutex);
__jump_label_transform(entry, type, init);
mutex_unlock(&text_mutex);
}
void arch_jump_label_transform(struct jump_entry *entry,
enum jump_label_type type)
{
mutex_lock(&text_mutex);
__jump_label_transform(entry, type, 0);
mutex_unlock(&text_mutex);
jump_label_transform(entry, type, 0);
}
#define TP_VEC_MAX (PAGE_SIZE / sizeof(struct text_poke_loc))
static struct text_poke_loc tp_vec[TP_VEC_MAX];
static int tp_vec_nr;
bool arch_jump_label_transform_queue(struct jump_entry *entry,
enum jump_label_type type)
{
struct text_poke_loc *tp;
void *entry_code;
const void *opcode;
if (system_state == SYSTEM_BOOTING) {
/*
@ -118,53 +112,19 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry,
return true;
}
/*
* No more space in the vector, tell upper layer to apply
* the queue before continuing.
*/
if (tp_vec_nr == TP_VEC_MAX)
return false;
tp = &tp_vec[tp_vec_nr];
entry_code = (void *)jump_entry_code(entry);
/*
* The INT3 handler will do a bsearch in the queue, so we need entries
* to be sorted. We can survive an unsorted list by rejecting the entry,
* forcing the generic jump_label code to apply the queue. Warning once,
* to raise the attention to the case of an unsorted entry that is
* better not happen, because, in the worst case we will perform in the
* same way as we do without batching - with some more overhead.
*/
if (tp_vec_nr > 0) {
int prev = tp_vec_nr - 1;
struct text_poke_loc *prev_tp = &tp_vec[prev];
if (WARN_ON_ONCE(prev_tp->addr > entry_code))
return false;
}
__jump_label_set_jump_code(entry, type,
(union jump_code_union *)&tp->text, 0);
text_poke_loc_init(tp, entry_code, NULL, JUMP_LABEL_NOP_SIZE, NULL);
tp_vec_nr++;
mutex_lock(&text_mutex);
opcode = __jump_label_set_jump_code(entry, type, 0);
text_poke_queue((void *)jump_entry_code(entry),
opcode, JUMP_LABEL_NOP_SIZE, NULL);
mutex_unlock(&text_mutex);
return true;
}
void arch_jump_label_transform_apply(void)
{
if (!tp_vec_nr)
return;
mutex_lock(&text_mutex);
text_poke_bp_batch(tp_vec, tp_vec_nr);
text_poke_finish();
mutex_unlock(&text_mutex);
tp_vec_nr = 0;
}
static enum {
@ -193,5 +153,5 @@ __init_or_module void arch_jump_label_transform_static(struct jump_entry *entry,
jlstate = JL_STATE_NO_UPDATE;
}
if (jlstate == JL_STATE_UPDATE)
__jump_label_transform(entry, type, 1);
jump_label_transform(entry, type, 1);
}

View File

@ -120,14 +120,14 @@ __synthesize_relative_insn(void *dest, void *from, void *to, u8 op)
/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
void synthesize_reljump(void *dest, void *from, void *to)
{
__synthesize_relative_insn(dest, from, to, RELATIVEJUMP_OPCODE);
__synthesize_relative_insn(dest, from, to, JMP32_INSN_OPCODE);
}
NOKPROBE_SYMBOL(synthesize_reljump);
/* Insert a call instruction at address 'from', which calls address 'to'.*/
void synthesize_relcall(void *dest, void *from, void *to)
{
__synthesize_relative_insn(dest, from, to, RELATIVECALL_OPCODE);
__synthesize_relative_insn(dest, from, to, CALL_INSN_OPCODE);
}
NOKPROBE_SYMBOL(synthesize_relcall);
@ -302,7 +302,7 @@ static int can_probe(unsigned long paddr)
* Another debugging subsystem might insert this breakpoint.
* In that case, we can't recover it.
*/
if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
if (insn.opcode.bytes[0] == INT3_INSN_OPCODE)
return 0;
addr += insn.length;
}
@ -357,7 +357,7 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn)
return 0;
/* Another subsystem puts a breakpoint, failed to recover */
if (insn->opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
if (insn->opcode.bytes[0] == INT3_INSN_OPCODE)
return 0;
/* We should not singlestep on the exception masking instructions */
@ -401,14 +401,14 @@ static int prepare_boost(kprobe_opcode_t *buf, struct kprobe *p,
int len = insn->length;
if (can_boost(insn, p->addr) &&
MAX_INSN_SIZE - len >= RELATIVEJUMP_SIZE) {
MAX_INSN_SIZE - len >= JMP32_INSN_SIZE) {
/*
* These instructions can be executed directly if it
* jumps back to correct address.
*/
synthesize_reljump(buf + len, p->ainsn.insn + len,
p->addr + insn->length);
len += RELATIVEJUMP_SIZE;
len += JMP32_INSN_SIZE;
p->ainsn.boostable = true;
} else {
p->ainsn.boostable = false;
@ -502,12 +502,14 @@ int arch_prepare_kprobe(struct kprobe *p)
void arch_arm_kprobe(struct kprobe *p)
{
text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1);
text_poke(p->addr, ((unsigned char []){INT3_INSN_OPCODE}), 1);
text_poke_sync();
}
void arch_disarm_kprobe(struct kprobe *p)
{
text_poke(p->addr, &p->opcode, 1);
text_poke_sync();
}
void arch_remove_kprobe(struct kprobe *p)
@ -610,7 +612,7 @@ static void setup_singlestep(struct kprobe *p, struct pt_regs *regs,
regs->flags |= X86_EFLAGS_TF;
regs->flags &= ~X86_EFLAGS_IF;
/* single step inline if the instruction is an int3 */
if (p->opcode == BREAKPOINT_INSTRUCTION)
if (p->opcode == INT3_INSN_OPCODE)
regs->ip = (unsigned long)p->addr;
else
regs->ip = (unsigned long)p->ainsn.insn;
@ -696,7 +698,7 @@ int kprobe_int3_handler(struct pt_regs *regs)
reset_current_kprobe();
return 1;
}
} else if (*addr != BREAKPOINT_INSTRUCTION) {
} else if (*addr != INT3_INSN_OPCODE) {
/*
* The breakpoint instruction was removed right
* after we hit it. Another cpu has removed

View File

@ -38,7 +38,7 @@ unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
long offs;
int i;
for (i = 0; i < RELATIVEJUMP_SIZE; i++) {
for (i = 0; i < JMP32_INSN_SIZE; i++) {
kp = get_kprobe((void *)addr - i);
/* This function only handles jump-optimized kprobe */
if (kp && kprobe_optimized(kp)) {
@ -62,10 +62,10 @@ unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
if (addr == (unsigned long)kp->addr) {
buf[0] = kp->opcode;
memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
memcpy(buf + 1, op->optinsn.copied_insn, DISP32_SIZE);
} else {
offs = addr - (unsigned long)kp->addr - 1;
memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs);
memcpy(buf, op->optinsn.copied_insn + offs, DISP32_SIZE - offs);
}
return (unsigned long)buf;
@ -141,8 +141,6 @@ STACK_FRAME_NON_STANDARD(optprobe_template_func);
#define TMPL_END_IDX \
((long)optprobe_template_end - (long)optprobe_template_entry)
#define INT3_SIZE sizeof(kprobe_opcode_t)
/* Optimized kprobe call back function: called from optinsn */
static void
optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
@ -162,7 +160,7 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
regs->cs |= get_kernel_rpl();
regs->gs = 0;
#endif
regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
regs->ip = (unsigned long)op->kp.addr + INT3_INSN_SIZE;
regs->orig_ax = ~0UL;
__this_cpu_write(current_kprobe, &op->kp);
@ -179,7 +177,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real)
struct insn insn;
int len = 0, ret;
while (len < RELATIVEJUMP_SIZE) {
while (len < JMP32_INSN_SIZE) {
ret = __copy_instruction(dest + len, src + len, real + len, &insn);
if (!ret || !can_boost(&insn, src + len))
return -EINVAL;
@ -271,7 +269,7 @@ static int can_optimize(unsigned long paddr)
return 0;
/* Check there is enough space for a relative jump. */
if (size - offset < RELATIVEJUMP_SIZE)
if (size - offset < JMP32_INSN_SIZE)
return 0;
/* Decode instructions */
@ -290,15 +288,15 @@ static int can_optimize(unsigned long paddr)
kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
insn_get_length(&insn);
/* Another subsystem puts a breakpoint */
if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
if (insn.opcode.bytes[0] == INT3_INSN_OPCODE)
return 0;
/* Recover address */
insn.kaddr = (void *)addr;
insn.next_byte = (void *)(addr + insn.length);
/* Check any instructions don't jump into target */
if (insn_is_indirect_jump(&insn) ||
insn_jump_into_range(&insn, paddr + INT3_SIZE,
RELATIVE_ADDR_SIZE))
insn_jump_into_range(&insn, paddr + INT3_INSN_SIZE,
DISP32_SIZE))
return 0;
addr += insn.length;
}
@ -374,7 +372,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
* Verify if the address gap is in 2GB range, because this uses
* a relative jump.
*/
rel = (long)slot - (long)op->kp.addr + RELATIVEJUMP_SIZE;
rel = (long)slot - (long)op->kp.addr + JMP32_INSN_SIZE;
if (abs(rel) > 0x7fffffff) {
ret = -ERANGE;
goto err;
@ -401,7 +399,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
/* Set returning jmp instruction at the tail of out-of-line buffer */
synthesize_reljump(buf + len, slot + len,
(u8 *)op->kp.addr + op->optinsn.size);
len += RELATIVEJUMP_SIZE;
len += JMP32_INSN_SIZE;
/* We have to use text_poke() for instruction buffer because it is RO */
text_poke(slot, buf, len);
@ -416,49 +414,50 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
}
/*
* Replace breakpoints (int3) with relative jumps.
* Replace breakpoints (INT3) with relative jumps (JMP.d32).
* Caller must call with locking kprobe_mutex and text_mutex.
*
* The caller will have installed a regular kprobe and after that issued
* syncrhonize_rcu_tasks(), this ensures that the instruction(s) that live in
* the 4 bytes after the INT3 are unused and can now be overwritten.
*/
void arch_optimize_kprobes(struct list_head *oplist)
{
struct optimized_kprobe *op, *tmp;
u8 insn_buff[RELATIVEJUMP_SIZE];
u8 insn_buff[JMP32_INSN_SIZE];
list_for_each_entry_safe(op, tmp, oplist, list) {
s32 rel = (s32)((long)op->optinsn.insn -
((long)op->kp.addr + RELATIVEJUMP_SIZE));
((long)op->kp.addr + JMP32_INSN_SIZE));
WARN_ON(kprobe_disabled(&op->kp));
/* Backup instructions which will be replaced by jump address */
memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
RELATIVE_ADDR_SIZE);
memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_INSN_SIZE,
DISP32_SIZE);
insn_buff[0] = RELATIVEJUMP_OPCODE;
insn_buff[0] = JMP32_INSN_OPCODE;
*(s32 *)(&insn_buff[1]) = rel;
text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE, NULL);
text_poke_bp(op->kp.addr, insn_buff, JMP32_INSN_SIZE, NULL);
list_del_init(&op->list);
}
}
/* Replace a relative jump with a breakpoint (int3). */
/*
* Replace a relative jump (JMP.d32) with a breakpoint (INT3).
*
* After that, we can restore the 4 bytes after the INT3 to undo what
* arch_optimize_kprobes() scribbled. This is safe since those bytes will be
* unused once the INT3 lands.
*/
void arch_unoptimize_kprobe(struct optimized_kprobe *op)
{
u8 insn_buff[RELATIVEJUMP_SIZE];
u8 emulate_buff[RELATIVEJUMP_SIZE];
/* Set int3 to first byte for kprobes */
insn_buff[0] = BREAKPOINT_INSTRUCTION;
memcpy(insn_buff + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
emulate_buff[0] = RELATIVEJUMP_OPCODE;
*(s32 *)(&emulate_buff[1]) = (s32)((long)op->optinsn.insn -
((long)op->kp.addr + RELATIVEJUMP_SIZE));
text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE,
emulate_buff);
arch_arm_kprobe(&op->kp);
text_poke(op->kp.addr + INT3_INSN_SIZE,
op->optinsn.copied_insn, DISP32_SIZE);
text_poke_sync();
}
/*

View File

@ -572,15 +572,6 @@ NOKPROBE_SYMBOL(do_general_protection);
dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
{
#ifdef CONFIG_DYNAMIC_FTRACE
/*
* ftrace must be first, everything else may cause a recursive crash.
* See note by declaration of modifying_ftrace_code in ftrace.c
*/
if (unlikely(atomic_read(&modifying_ftrace_code)) &&
ftrace_int3_handler(regs))
return;
#endif
if (poke_int3_handler(regs))
return;

View File

@ -873,34 +873,6 @@ void arch_remove_memory(int nid, u64 start, u64 size,
int kernel_set_to_readonly __read_mostly;
void set_kernel_text_rw(void)
{
unsigned long start = PFN_ALIGN(_text);
unsigned long size = PFN_ALIGN(_etext) - start;
if (!kernel_set_to_readonly)
return;
pr_debug("Set kernel text: %lx - %lx for read write\n",
start, start+size);
set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT);
}
void set_kernel_text_ro(void)
{
unsigned long start = PFN_ALIGN(_text);
unsigned long size = PFN_ALIGN(_etext) - start;
if (!kernel_set_to_readonly)
return;
pr_debug("Set kernel text: %lx - %lx for read only\n",
start, start+size);
set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
}
static void mark_nxdata_nx(void)
{
/*

View File

@ -1258,42 +1258,6 @@ void __init mem_init(void)
int kernel_set_to_readonly;
void set_kernel_text_rw(void)
{
unsigned long start = PFN_ALIGN(_text);
unsigned long end = PFN_ALIGN(_etext);
if (!kernel_set_to_readonly)
return;
pr_debug("Set kernel text: %lx - %lx for read write\n",
start, end);
/*
* Make the kernel identity mapping for text RW. Kernel text
* mapping will always be RO. Refer to the comment in
* static_protections() in pageattr.c
*/
set_memory_rw(start, (end - start) >> PAGE_SHIFT);
}
void set_kernel_text_ro(void)
{
unsigned long start = PFN_ALIGN(_text);
unsigned long end = PFN_ALIGN(_etext);
if (!kernel_set_to_readonly)
return;
pr_debug("Set kernel text: %lx - %lx for read only\n",
start, end);
/*
* Set the kernel identity mapping for text RO.
*/
set_memory_ro(start, (end - start) >> PAGE_SHIFT);
}
void mark_rodata_ro(void)
{
unsigned long start = PFN_ALIGN(_text);

View File

@ -138,10 +138,10 @@ TRACE_EVENT(/* put_tid */
TP_ARGS(dd, index, type, pa, order),
TP_STRUCT__entry(/* entry */
DD_DEV_ENTRY(dd)
__field(unsigned long, pa);
__field(u32, index);
__field(u32, type);
__field(u16, order);
__field(unsigned long, pa)
__field(u32, index)
__field(u32, type)
__field(u16, order)
),
TP_fast_assign(/* assign */
DD_DEV_ASSIGN(dd);

View File

@ -588,7 +588,7 @@ TRACE_EVENT(hfi1_sdma_user_reqinfo,
TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 *i),
TP_ARGS(dd, ctxt, subctxt, i),
TP_STRUCT__entry(
DD_DEV_ENTRY(dd);
DD_DEV_ENTRY(dd)
__field(u16, ctxt)
__field(u8, subctxt)
__field(u8, ver_opcode)

View File

@ -46,7 +46,7 @@ TRACE_EVENT(pblk_chunk_reset,
TP_STRUCT__entry(
__string(name, name)
__field(u64, ppa)
__field(int, state);
__field(int, state)
),
TP_fast_assign(
@ -72,7 +72,7 @@ TRACE_EVENT(pblk_chunk_state,
TP_STRUCT__entry(
__string(name, name)
__field(u64, ppa)
__field(int, state);
__field(int, state)
),
TP_fast_assign(
@ -98,7 +98,7 @@ TRACE_EVENT(pblk_line_state,
TP_STRUCT__entry(
__string(name, name)
__field(int, line)
__field(int, state);
__field(int, state)
),
TP_fast_assign(
@ -121,7 +121,7 @@ TRACE_EVENT(pblk_state,
TP_STRUCT__entry(
__string(name, name)
__field(int, state);
__field(int, state)
),
TP_fast_assign(

View File

@ -28,7 +28,7 @@ TRACE_EVENT(fjes_hw_issue_request_command,
__field(u8, cs_busy)
__field(u8, cs_complete)
__field(int, timeout)
__field(int, ret);
__field(int, ret)
),
TP_fast_assign(
__entry->cr_req = cr->bits.req_code;

View File

@ -239,7 +239,7 @@ TRACE_EVENT(ath10k_wmi_dbglog,
TP_STRUCT__entry(
__string(device, dev_name(ar->dev))
__string(driver, dev_driver_string(ar->dev))
__field(u8, hw_type);
__field(u8, hw_type)
__field(size_t, buf_len)
__dynamic_array(u8, buf, buf_len)
),
@ -269,7 +269,7 @@ TRACE_EVENT(ath10k_htt_pktlog,
TP_STRUCT__entry(
__string(device, dev_name(ar->dev))
__string(driver, dev_driver_string(ar->dev))
__field(u8, hw_type);
__field(u8, hw_type)
__field(u16, buf_len)
__dynamic_array(u8, pktlog, buf_len)
),
@ -435,7 +435,7 @@ TRACE_EVENT(ath10k_htt_rx_desc,
TP_STRUCT__entry(
__string(device, dev_name(ar->dev))
__string(driver, dev_driver_string(ar->dev))
__field(u8, hw_type);
__field(u8, hw_type)
__field(u16, len)
__dynamic_array(u8, rxdesc, len)
),

View File

@ -329,7 +329,7 @@ TRACE_EVENT(xchk_btree_op_error,
__field(int, level)
__field(xfs_agnumber_t, agno)
__field(xfs_agblock_t, bno)
__field(int, ptr);
__field(int, ptr)
__field(int, error)
__field(void *, ret_ip)
),
@ -414,7 +414,7 @@ TRACE_EVENT(xchk_btree_error,
__field(int, level)
__field(xfs_agnumber_t, agno)
__field(xfs_agblock_t, bno)
__field(int, ptr);
__field(int, ptr)
__field(void *, ret_ip)
),
TP_fast_assign(
@ -452,7 +452,7 @@ TRACE_EVENT(xchk_ifork_btree_error,
__field(int, level)
__field(xfs_agnumber_t, agno)
__field(xfs_agblock_t, bno)
__field(int, ptr);
__field(int, ptr)
__field(void *, ret_ip)
),
TP_fast_assign(

View File

@ -218,8 +218,8 @@ DECLARE_EVENT_CLASS(xfs_bmap_class,
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(void *, leaf);
__field(int, pos);
__field(void *, leaf)
__field(int, pos)
__field(xfs_fileoff_t, startoff)
__field(xfs_fsblock_t, startblock)
__field(xfs_filblks_t, blockcount)

View File

@ -849,13 +849,9 @@ extern int module_sysfs_initialized;
#define __MODULE_STRING(x) __stringify(x)
#ifdef CONFIG_STRICT_MODULE_RWX
extern void set_all_modules_text_rw(void);
extern void set_all_modules_text_ro(void);
extern void module_enable_ro(const struct module *mod, bool after_init);
extern void module_disable_ro(const struct module *mod);
#else
static inline void set_all_modules_text_rw(void) { }
static inline void set_all_modules_text_ro(void) { }
static inline void module_enable_ro(const struct module *mod, bool after_init) { }
static inline void module_disable_ro(const struct module *mod) { }
#endif

View File

@ -192,6 +192,22 @@ enum trace_reg {
struct trace_event_call;
#define TRACE_FUNCTION_TYPE ((const char *)~0UL)
struct trace_event_fields {
const char *type;
union {
struct {
const char *name;
const int size;
const int align;
const int is_signed;
const int filter_type;
};
int (*define_fields)(struct trace_event_call *);
};
};
struct trace_event_class {
const char *system;
void *probe;
@ -200,7 +216,7 @@ struct trace_event_class {
#endif
int (*reg)(struct trace_event_call *event,
enum trace_reg type, void *data);
int (*define_fields)(struct trace_event_call *);
struct trace_event_fields *fields_array;
struct list_head *(*get_fields)(struct trace_event_call *);
struct list_head fields;
int (*raw_init)(struct trace_event_call *);

View File

@ -85,7 +85,7 @@ TRACE_EVENT(file_check_and_advance_wb_err,
TP_ARGS(file, old),
TP_STRUCT__entry(
__field(struct file *, file);
__field(struct file *, file)
__field(unsigned long, i_ino)
__field(dev_t, s_dev)
__field(errseq_t, old)

View File

@ -400,22 +400,16 @@ static struct trace_event_functions trace_event_type_funcs_##call = { \
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
#undef __field_ext
#define __field_ext(type, item, filter_type) \
ret = trace_define_field(event_call, #type, #item, \
offsetof(typeof(field), item), \
sizeof(field.item), \
is_signed_type(type), filter_type); \
if (ret) \
return ret;
#define __field_ext(_type, _item, _filter_type) { \
.type = #_type, .name = #_item, \
.size = sizeof(_type), .align = __alignof__(_type), \
.is_signed = is_signed_type(_type), .filter_type = _filter_type },
#undef __field_struct_ext
#define __field_struct_ext(type, item, filter_type) \
ret = trace_define_field(event_call, #type, #item, \
offsetof(typeof(field), item), \
sizeof(field.item), \
0, filter_type); \
if (ret) \
return ret;
#define __field_struct_ext(_type, _item, _filter_type) { \
.type = #_type, .name = #_item, \
.size = sizeof(_type), .align = __alignof__(_type), \
0, .filter_type = _filter_type },
#undef __field
#define __field(type, item) __field_ext(type, item, FILTER_OTHER)
@ -424,25 +418,16 @@ static struct trace_event_functions trace_event_type_funcs_##call = { \
#define __field_struct(type, item) __field_struct_ext(type, item, FILTER_OTHER)
#undef __array
#define __array(type, item, len) \
do { \
char *type_str = #type"["__stringify(len)"]"; \
BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \
BUILD_BUG_ON(len <= 0); \
ret = trace_define_field(event_call, type_str, #item, \
offsetof(typeof(field), item), \
sizeof(field.item), \
is_signed_type(type), FILTER_OTHER); \
if (ret) \
return ret; \
} while (0);
#define __array(_type, _item, _len) { \
.type = #_type"["__stringify(_len)"]", .name = #_item, \
.size = sizeof(_type[_len]), .align = __alignof__(_type), \
.is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER },
#undef __dynamic_array
#define __dynamic_array(type, item, len) \
ret = trace_define_field(event_call, "__data_loc " #type "[]", #item, \
offsetof(typeof(field), __data_loc_##item), \
sizeof(field.__data_loc_##item), \
is_signed_type(type), FILTER_OTHER);
#define __dynamic_array(_type, _item, _len) { \
.type = "__data_loc " #_type "[]", .name = #_item, \
.size = 4, .align = 4, \
.is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER },
#undef __string
#define __string(item, src) __dynamic_array(char, item, -1)
@ -452,16 +437,9 @@ static struct trace_event_functions trace_event_type_funcs_##call = { \
#undef DECLARE_EVENT_CLASS
#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \
static int notrace __init \
trace_event_define_fields_##call(struct trace_event_call *event_call) \
{ \
struct trace_event_raw_##call field; \
int ret; \
\
tstruct; \
\
return ret; \
}
static struct trace_event_fields trace_event_fields_##call[] = { \
tstruct \
{} };
#undef DEFINE_EVENT
#define DEFINE_EVENT(template, name, proto, args)
@ -619,7 +597,7 @@ static inline notrace int trace_event_get_offsets_##call( \
*
* static struct trace_event_class __used event_class_<template> = {
* .system = "<system>",
* .define_fields = trace_event_define_fields_<call>,
* .fields_array = trace_event_fields_<call>,
* .fields = LIST_HEAD_INIT(event_class_##call.fields),
* .raw_init = trace_event_raw_init,
* .probe = trace_event_raw_event_##call,
@ -768,7 +746,7 @@ _TRACE_PERF_PROTO(call, PARAMS(proto)); \
static char print_fmt_##call[] = print; \
static struct trace_event_class __used __refdata event_class_##call = { \
.system = TRACE_SYSTEM_STRING, \
.define_fields = trace_event_define_fields_##call, \
.fields_array = trace_event_fields_##call, \
.fields = LIST_HEAD_INIT(event_class_##call.fields),\
.raw_init = trace_event_raw_init, \
.probe = trace_event_raw_event_##call, \

View File

@ -510,6 +510,8 @@ static void do_unoptimize_kprobes(void)
arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
/* Loop free_list for disarming */
list_for_each_entry_safe(op, tmp, &freeing_list, list) {
/* Switching from detour code to origin */
op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
/* Disarm probes if marked disabled */
if (kprobe_disabled(&op->kp))
arch_disarm_kprobe(&op->kp);
@ -610,6 +612,18 @@ void wait_for_kprobe_optimizer(void)
mutex_unlock(&kprobe_mutex);
}
static bool optprobe_queued_unopt(struct optimized_kprobe *op)
{
struct optimized_kprobe *_op;
list_for_each_entry(_op, &unoptimizing_list, list) {
if (op == _op)
return true;
}
return false;
}
/* Optimize kprobe if p is ready to be optimized */
static void optimize_kprobe(struct kprobe *p)
{
@ -631,17 +645,21 @@ static void optimize_kprobe(struct kprobe *p)
return;
/* Check if it is already optimized. */
if (op->kp.flags & KPROBE_FLAG_OPTIMIZED)
if (op->kp.flags & KPROBE_FLAG_OPTIMIZED) {
if (optprobe_queued_unopt(op)) {
/* This is under unoptimizing. Just dequeue the probe */
list_del_init(&op->list);
}
return;
}
op->kp.flags |= KPROBE_FLAG_OPTIMIZED;
if (!list_empty(&op->list))
/* This is under unoptimizing. Just dequeue the probe */
list_del_init(&op->list);
else {
list_add(&op->list, &optimizing_list);
kick_kprobe_optimizer();
}
/* On unoptimizing/optimizing_list, op must have OPTIMIZED flag */
if (WARN_ON_ONCE(!list_empty(&op->list)))
return;
list_add(&op->list, &optimizing_list);
kick_kprobe_optimizer();
}
/* Short cut to direct unoptimizing */
@ -649,6 +667,7 @@ static void force_unoptimize_kprobe(struct optimized_kprobe *op)
{
lockdep_assert_cpus_held();
arch_unoptimize_kprobe(op);
op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
if (kprobe_disabled(&op->kp))
arch_disarm_kprobe(&op->kp);
}
@ -662,31 +681,33 @@ static void unoptimize_kprobe(struct kprobe *p, bool force)
return; /* This is not an optprobe nor optimized */
op = container_of(p, struct optimized_kprobe, kp);
if (!kprobe_optimized(p)) {
/* Unoptimized or unoptimizing case */
if (force && !list_empty(&op->list)) {
/*
* Only if this is unoptimizing kprobe and forced,
* forcibly unoptimize it. (No need to unoptimize
* unoptimized kprobe again :)
*/
if (!kprobe_optimized(p))
return;
if (!list_empty(&op->list)) {
if (optprobe_queued_unopt(op)) {
/* Queued in unoptimizing queue */
if (force) {
/*
* Forcibly unoptimize the kprobe here, and queue it
* in the freeing list for release afterwards.
*/
force_unoptimize_kprobe(op);
list_move(&op->list, &freeing_list);
}
} else {
/* Dequeue from the optimizing queue */
list_del_init(&op->list);
force_unoptimize_kprobe(op);
op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
}
return;
}
op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
if (!list_empty(&op->list)) {
/* Dequeue from the optimization queue */
list_del_init(&op->list);
return;
}
/* Optimized kprobe case */
if (force)
if (force) {
/* Forcibly update the code: this is a special case */
force_unoptimize_kprobe(op);
else {
} else {
list_add(&op->list, &unoptimizing_list);
kick_kprobe_optimizer();
}

View File

@ -2031,49 +2031,6 @@ static void module_enable_nx(const struct module *mod)
frob_writable_data(&mod->init_layout, set_memory_nx);
}
/* Iterate through all modules and set each module's text as RW */
void set_all_modules_text_rw(void)
{
struct module *mod;
if (!rodata_enabled)
return;
mutex_lock(&module_mutex);
list_for_each_entry_rcu(mod, &modules, list) {
if (mod->state == MODULE_STATE_UNFORMED)
continue;
frob_text(&mod->core_layout, set_memory_rw);
frob_text(&mod->init_layout, set_memory_rw);
}
mutex_unlock(&module_mutex);
}
/* Iterate through all modules and set each module's text as RO */
void set_all_modules_text_ro(void)
{
struct module *mod;
if (!rodata_enabled)
return;
mutex_lock(&module_mutex);
list_for_each_entry_rcu(mod, &modules, list) {
/*
* Ignore going modules since it's possible that ro
* protection has already been disabled, otherwise we'll
* run into protection faults at module deallocation.
*/
if (mod->state == MODULE_STATE_UNFORMED ||
mod->state == MODULE_STATE_GOING)
continue;
frob_text(&mod->core_layout, set_memory_ro);
frob_text(&mod->init_layout, set_memory_ro);
}
mutex_unlock(&module_mutex);
}
#else /* !CONFIG_STRICT_MODULE_RWX */
static void module_enable_nx(const struct module *mod) { }
#endif /* CONFIG_STRICT_MODULE_RWX */

View File

@ -52,6 +52,9 @@ enum trace_type {
#undef __field
#define __field(type, item) type item;
#undef __field_fn
#define __field_fn(type, item) type item;
#undef __field_struct
#define __field_struct(type, item) __field(type, item)
@ -71,26 +74,22 @@ enum trace_type {
#define F_STRUCT(args...) args
#undef FTRACE_ENTRY
#define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \
#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
struct struct_name { \
struct trace_entry ent; \
tstruct \
}
#undef FTRACE_ENTRY_DUP
#define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk, filter)
#define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk)
#undef FTRACE_ENTRY_REG
#define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, \
filter, regfn) \
FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \
filter)
#define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, regfn) \
FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print))
#undef FTRACE_ENTRY_PACKED
#define FTRACE_ENTRY_PACKED(name, struct_name, id, tstruct, print, \
filter) \
FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \
filter) __packed
#define FTRACE_ENTRY_PACKED(name, struct_name, id, tstruct, print) \
FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print)) __packed
#include "trace_entries.h"
@ -1917,17 +1916,15 @@ extern void tracing_log_err(struct trace_array *tr,
#define internal_trace_puts(str) __trace_puts(_THIS_IP_, str, strlen(str))
#undef FTRACE_ENTRY
#define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter) \
#define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \
extern struct trace_event_call \
__aligned(4) event_##call;
#undef FTRACE_ENTRY_DUP
#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print, filter) \
FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print), \
filter)
#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \
FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print))
#undef FTRACE_ENTRY_PACKED
#define FTRACE_ENTRY_PACKED(call, struct_name, id, tstruct, print, filter) \
FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print), \
filter)
#define FTRACE_ENTRY_PACKED(call, struct_name, id, tstruct, print) \
FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print))
#include "trace_entries.h"

View File

@ -61,15 +61,13 @@ FTRACE_ENTRY_REG(function, ftrace_entry,
TRACE_FN,
F_STRUCT(
__field( unsigned long, ip )
__field( unsigned long, parent_ip )
__field_fn( unsigned long, ip )
__field_fn( unsigned long, parent_ip )
),
F_printk(" %ps <-- %ps",
(void *)__entry->ip, (void *)__entry->parent_ip),
FILTER_TRACE_FN,
perf_ftrace_event_register
);
@ -84,9 +82,7 @@ FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
__field_desc( int, graph_ent, depth )
),
F_printk("--> %ps (%d)", (void *)__entry->func, __entry->depth),
FILTER_OTHER
F_printk("--> %ps (%d)", (void *)__entry->func, __entry->depth)
);
/* Function return entry */
@ -97,18 +93,16 @@ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
F_STRUCT(
__field_struct( struct ftrace_graph_ret, ret )
__field_desc( unsigned long, ret, func )
__field_desc( unsigned long, ret, overrun )
__field_desc( unsigned long long, ret, calltime)
__field_desc( unsigned long long, ret, rettime )
__field_desc( unsigned long, ret, overrun )
__field_desc( int, ret, depth )
),
F_printk("<-- %ps (%d) (start: %llx end: %llx) over: %d",
(void *)__entry->func, __entry->depth,
__entry->calltime, __entry->rettime,
__entry->depth),
FILTER_OTHER
__entry->depth)
);
/*
@ -137,9 +131,7 @@ FTRACE_ENTRY(context_switch, ctx_switch_entry,
F_printk("%u:%u:%u ==> %u:%u:%u [%03u]",
__entry->prev_pid, __entry->prev_prio, __entry->prev_state,
__entry->next_pid, __entry->next_prio, __entry->next_state,
__entry->next_cpu),
FILTER_OTHER
__entry->next_cpu)
);
/*
@ -157,9 +149,7 @@ FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry,
F_printk("%u:%u:%u ==+ %u:%u:%u [%03u]",
__entry->prev_pid, __entry->prev_prio, __entry->prev_state,
__entry->next_pid, __entry->next_prio, __entry->next_state,
__entry->next_cpu),
FILTER_OTHER
__entry->next_cpu)
);
/*
@ -183,9 +173,7 @@ FTRACE_ENTRY(kernel_stack, stack_entry,
(void *)__entry->caller[0], (void *)__entry->caller[1],
(void *)__entry->caller[2], (void *)__entry->caller[3],
(void *)__entry->caller[4], (void *)__entry->caller[5],
(void *)__entry->caller[6], (void *)__entry->caller[7]),
FILTER_OTHER
(void *)__entry->caller[6], (void *)__entry->caller[7])
);
FTRACE_ENTRY(user_stack, userstack_entry,
@ -203,9 +191,7 @@ FTRACE_ENTRY(user_stack, userstack_entry,
(void *)__entry->caller[0], (void *)__entry->caller[1],
(void *)__entry->caller[2], (void *)__entry->caller[3],
(void *)__entry->caller[4], (void *)__entry->caller[5],
(void *)__entry->caller[6], (void *)__entry->caller[7]),
FILTER_OTHER
(void *)__entry->caller[6], (void *)__entry->caller[7])
);
/*
@ -222,9 +208,7 @@ FTRACE_ENTRY(bprint, bprint_entry,
),
F_printk("%ps: %s",
(void *)__entry->ip, __entry->fmt),
FILTER_OTHER
(void *)__entry->ip, __entry->fmt)
);
FTRACE_ENTRY_REG(print, print_entry,
@ -239,8 +223,6 @@ FTRACE_ENTRY_REG(print, print_entry,
F_printk("%ps: %s",
(void *)__entry->ip, __entry->buf),
FILTER_OTHER,
ftrace_event_register
);
@ -254,9 +236,7 @@ FTRACE_ENTRY(raw_data, raw_data_entry,
),
F_printk("id:%04x %08x",
__entry->id, (int)__entry->buf[0]),
FILTER_OTHER
__entry->id, (int)__entry->buf[0])
);
FTRACE_ENTRY(bputs, bputs_entry,
@ -269,9 +249,7 @@ FTRACE_ENTRY(bputs, bputs_entry,
),
F_printk("%ps: %s",
(void *)__entry->ip, __entry->str),
FILTER_OTHER
(void *)__entry->ip, __entry->str)
);
FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw,
@ -283,16 +261,14 @@ FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw,
__field_desc( resource_size_t, rw, phys )
__field_desc( unsigned long, rw, value )
__field_desc( unsigned long, rw, pc )
__field_desc( int, rw, map_id )
__field_desc( int, rw, map_id )
__field_desc( unsigned char, rw, opcode )
__field_desc( unsigned char, rw, width )
),
F_printk("%lx %lx %lx %d %x %x",
(unsigned long)__entry->phys, __entry->value, __entry->pc,
__entry->map_id, __entry->opcode, __entry->width),
FILTER_OTHER
__entry->map_id, __entry->opcode, __entry->width)
);
FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map,
@ -304,15 +280,13 @@ FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map,
__field_desc( resource_size_t, map, phys )
__field_desc( unsigned long, map, virt )
__field_desc( unsigned long, map, len )
__field_desc( int, map, map_id )
__field_desc( int, map, map_id )
__field_desc( unsigned char, map, opcode )
),
F_printk("%lx %lx %lx %d %x",
(unsigned long)__entry->phys, __entry->virt, __entry->len,
__entry->map_id, __entry->opcode),
FILTER_OTHER
__entry->map_id, __entry->opcode)
);
@ -334,9 +308,7 @@ FTRACE_ENTRY(branch, trace_branch,
F_printk("%u:%s:%s (%u)%s",
__entry->line,
__entry->func, __entry->file, __entry->correct,
__entry->constant ? " CONSTANT" : ""),
FILTER_OTHER
__entry->constant ? " CONSTANT" : "")
);
@ -362,7 +334,5 @@ FTRACE_ENTRY(hwlat, hwlat_entry,
__entry->duration,
__entry->outer_duration,
__entry->nmi_total_ts,
__entry->nmi_count),
FILTER_OTHER
__entry->nmi_count)
);

View File

@ -24,6 +24,7 @@
#include <linux/delay.h>
#include <trace/events/sched.h>
#include <trace/syscall.h>
#include <asm/setup.h>
@ -2017,7 +2018,24 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file)
*/
head = trace_get_fields(call);
if (list_empty(head)) {
ret = call->class->define_fields(call);
struct trace_event_fields *field = call->class->fields_array;
unsigned int offset = sizeof(struct trace_entry);
for (; field->type; field++) {
if (field->type == TRACE_FUNCTION_TYPE) {
ret = field->define_fields(call);
break;
}
offset = ALIGN(offset, field->align);
ret = trace_define_field(call, field->type, field->name,
offset, field->size,
field->is_signed, field->filter_type);
if (ret)
break;
offset += field->size;
}
if (ret < 0) {
pr_warn("Could not initialize trace point events/%s\n",
name);

View File

@ -1155,6 +1155,12 @@ static struct synth_event *find_synth_event(const char *name)
return NULL;
}
static struct trace_event_fields synth_event_fields_array[] = {
{ .type = TRACE_FUNCTION_TYPE,
.define_fields = synth_event_define_fields },
{}
};
static int register_synth_event(struct synth_event *event)
{
struct trace_event_call *call = &event->call;
@ -1176,7 +1182,7 @@ static int register_synth_event(struct synth_event *event)
INIT_LIST_HEAD(&call->class->fields);
call->event.funcs = &synth_event_funcs;
call->class->define_fields = synth_event_define_fields;
call->class->fields_array = synth_event_fields_array;
ret = register_trace_event(&call->event);
if (!ret) {

View File

@ -29,10 +29,8 @@ static int ftrace_event_register(struct trace_event_call *call,
* function and thus become accesible via perf.
*/
#undef FTRACE_ENTRY_REG
#define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, \
filter, regfn) \
FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \
filter)
#define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, regfn) \
FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print))
/* not needed for this file */
#undef __field_struct
@ -41,6 +39,9 @@ static int ftrace_event_register(struct trace_event_call *call,
#undef __field
#define __field(type, item) type item;
#undef __field_fn
#define __field_fn(type, item) type item;
#undef __field_desc
#define __field_desc(type, container, item) type item;
@ -60,7 +61,7 @@ static int ftrace_event_register(struct trace_event_call *call,
#define F_printk(fmt, args...) fmt, args
#undef FTRACE_ENTRY
#define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \
#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
struct ____ftrace_##name { \
tstruct \
}; \
@ -73,76 +74,46 @@ static void __always_unused ____ftrace_check_##name(void) \
}
#undef FTRACE_ENTRY_DUP
#define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print, filter) \
FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \
filter)
#define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print) \
FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print))
#include "trace_entries.h"
#undef __field_ext
#define __field_ext(_type, _item, _filter_type) { \
.type = #_type, .name = #_item, \
.size = sizeof(_type), .align = __alignof__(_type), \
is_signed_type(_type), .filter_type = _filter_type },
#undef __field
#define __field(type, item) \
ret = trace_define_field(event_call, #type, #item, \
offsetof(typeof(field), item), \
sizeof(field.item), \
is_signed_type(type), filter_type); \
if (ret) \
return ret;
#define __field(_type, _item) __field_ext(_type, _item, FILTER_OTHER)
#undef __field_fn
#define __field_fn(_type, _item) __field_ext(_type, _item, FILTER_TRACE_FN)
#undef __field_desc
#define __field_desc(type, container, item) \
ret = trace_define_field(event_call, #type, #item, \
offsetof(typeof(field), \
container.item), \
sizeof(field.container.item), \
is_signed_type(type), filter_type); \
if (ret) \
return ret;
#define __field_desc(_type, _container, _item) __field_ext(_type, _item, FILTER_OTHER)
#undef __array
#define __array(type, item, len) \
do { \
char *type_str = #type"["__stringify(len)"]"; \
BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \
ret = trace_define_field(event_call, type_str, #item, \
offsetof(typeof(field), item), \
sizeof(field.item), \
is_signed_type(type), filter_type); \
if (ret) \
return ret; \
} while (0);
#define __array(_type, _item, _len) { \
.type = #_type"["__stringify(_len)"]", .name = #_item, \
.size = sizeof(_type[_len]), .align = __alignof__(_type), \
is_signed_type(_type), .filter_type = FILTER_OTHER },
#undef __array_desc
#define __array_desc(type, container, item, len) \
BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \
ret = trace_define_field(event_call, #type "[" #len "]", #item, \
offsetof(typeof(field), \
container.item), \
sizeof(field.container.item), \
is_signed_type(type), filter_type); \
if (ret) \
return ret;
#define __array_desc(_type, _container, _item, _len) __array(_type, _item, _len)
#undef __dynamic_array
#define __dynamic_array(type, item) \
ret = trace_define_field(event_call, #type "[]", #item, \
offsetof(typeof(field), item), \
0, is_signed_type(type), filter_type);\
if (ret) \
return ret;
#define __dynamic_array(_type, _item) { \
.type = #_type "[]", .name = #_item, \
.size = 0, .align = __alignof__(_type), \
is_signed_type(_type), .filter_type = FILTER_OTHER },
#undef FTRACE_ENTRY
#define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \
static int __init \
ftrace_define_fields_##name(struct trace_event_call *event_call) \
{ \
struct struct_name field; \
int ret; \
int filter_type = filter; \
\
tstruct; \
\
return ret; \
}
#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
static struct trace_event_fields ftrace_event_fields_##name[] = { \
tstruct \
{} };
#include "trace_entries.h"
@ -152,6 +123,9 @@ ftrace_define_fields_##name(struct trace_event_call *event_call) \
#undef __field
#define __field(type, item)
#undef __field_fn
#define __field_fn(type, item)
#undef __field_desc
#define __field_desc(type, container, item)
@ -168,12 +142,10 @@ ftrace_define_fields_##name(struct trace_event_call *event_call) \
#define F_printk(fmt, args...) __stringify(fmt) ", " __stringify(args)
#undef FTRACE_ENTRY_REG
#define FTRACE_ENTRY_REG(call, struct_name, etype, tstruct, print, filter,\
regfn) \
\
#define FTRACE_ENTRY_REG(call, struct_name, etype, tstruct, print, regfn) \
static struct trace_event_class __refdata event_class_ftrace_##call = { \
.system = __stringify(TRACE_SYSTEM), \
.define_fields = ftrace_define_fields_##call, \
.fields_array = ftrace_event_fields_##call, \
.fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\
.reg = regfn, \
}; \
@ -191,9 +163,9 @@ static struct trace_event_call __used \
__attribute__((section("_ftrace_events"))) *__event_##call = &event_##call;
#undef FTRACE_ENTRY
#define FTRACE_ENTRY(call, struct_name, etype, tstruct, print, filter) \
#define FTRACE_ENTRY(call, struct_name, etype, tstruct, print) \
FTRACE_ENTRY_REG(call, struct_name, etype, \
PARAMS(tstruct), PARAMS(print), filter, NULL)
PARAMS(tstruct), PARAMS(print), NULL)
bool ftrace_event_is_function(struct trace_event_call *call)
{

View File

@ -1555,16 +1555,28 @@ static struct trace_event_functions kprobe_funcs = {
.trace = print_kprobe_event
};
static struct trace_event_fields kretprobe_fields_array[] = {
{ .type = TRACE_FUNCTION_TYPE,
.define_fields = kretprobe_event_define_fields },
{}
};
static struct trace_event_fields kprobe_fields_array[] = {
{ .type = TRACE_FUNCTION_TYPE,
.define_fields = kprobe_event_define_fields },
{}
};
static inline void init_trace_event_call(struct trace_kprobe *tk)
{
struct trace_event_call *call = trace_probe_event_call(&tk->tp);
if (trace_kprobe_is_return(tk)) {
call->event.funcs = &kretprobe_funcs;
call->class->define_fields = kretprobe_event_define_fields;
call->class->fields_array = kretprobe_fields_array;
} else {
call->event.funcs = &kprobe_funcs;
call->class->define_fields = kprobe_event_define_fields;
call->class->fields_array = kprobe_fields_array;
}
call->flags = TRACE_EVENT_FL_KPROBE;

View File

@ -203,11 +203,10 @@ print_syscall_exit(struct trace_iterator *iter, int flags,
extern char *__bad_type_size(void);
#define SYSCALL_FIELD(type, field, name) \
sizeof(type) != sizeof(trace.field) ? \
__bad_type_size() : \
#type, #name, offsetof(typeof(trace), field), \
sizeof(trace.field), is_signed_type(type)
#define SYSCALL_FIELD(_type, _name) { \
.type = #_type, .name = #_name, \
.size = sizeof(_type), .align = __alignof__(_type), \
.is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER }
static int __init
__set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
@ -274,42 +273,23 @@ static int __init syscall_enter_define_fields(struct trace_event_call *call)
{
struct syscall_trace_enter trace;
struct syscall_metadata *meta = call->data;
int ret;
int i;
int offset = offsetof(typeof(trace), args);
ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr),
FILTER_OTHER);
if (ret)
return ret;
int ret = 0;
int i;
for (i = 0; i < meta->nb_args; i++) {
ret = trace_define_field(call, meta->types[i],
meta->args[i], offset,
sizeof(unsigned long), 0,
FILTER_OTHER);
if (ret)
break;
offset += sizeof(unsigned long);
}
return ret;
}
static int __init syscall_exit_define_fields(struct trace_event_call *call)
{
struct syscall_trace_exit trace;
int ret;
ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr),
FILTER_OTHER);
if (ret)
return ret;
ret = trace_define_field(call, SYSCALL_FIELD(long, ret, ret),
FILTER_OTHER);
return ret;
}
static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
{
struct trace_array *tr = data;
@ -507,6 +487,13 @@ static int __init init_syscall_trace(struct trace_event_call *call)
return id;
}
static struct trace_event_fields __refdata syscall_enter_fields_array[] = {
SYSCALL_FIELD(int, __syscall_nr),
{ .type = TRACE_FUNCTION_TYPE,
.define_fields = syscall_enter_define_fields },
{}
};
struct trace_event_functions enter_syscall_print_funcs = {
.trace = print_syscall_enter,
};
@ -518,7 +505,7 @@ struct trace_event_functions exit_syscall_print_funcs = {
struct trace_event_class __refdata event_class_syscall_enter = {
.system = "syscalls",
.reg = syscall_enter_register,
.define_fields = syscall_enter_define_fields,
.fields_array = syscall_enter_fields_array,
.get_fields = syscall_get_enter_fields,
.raw_init = init_syscall_trace,
};
@ -526,7 +513,11 @@ struct trace_event_class __refdata event_class_syscall_enter = {
struct trace_event_class __refdata event_class_syscall_exit = {
.system = "syscalls",
.reg = syscall_exit_register,
.define_fields = syscall_exit_define_fields,
.fields_array = (struct trace_event_fields[]){
SYSCALL_FIELD(int, __syscall_nr),
SYSCALL_FIELD(long, ret),
{}
},
.fields = LIST_HEAD_INIT(event_class_syscall_exit.fields),
.raw_init = init_syscall_trace,
};

View File

@ -1528,12 +1528,17 @@ static struct trace_event_functions uprobe_funcs = {
.trace = print_uprobe_event
};
static struct trace_event_fields uprobe_fields_array[] = {
{ .type = TRACE_FUNCTION_TYPE,
.define_fields = uprobe_event_define_fields },
{}
};
static inline void init_trace_event_call(struct trace_uprobe *tu)
{
struct trace_event_call *call = trace_probe_event_call(&tu->tp);
call->event.funcs = &uprobe_funcs;
call->class->define_fields = uprobe_event_define_fields;
call->class->fields_array = uprobe_fields_array;
call->flags = TRACE_EVENT_FL_UPROBE | TRACE_EVENT_FL_CAP_ANY;
call->class->reg = trace_uprobe_register;

View File

@ -408,20 +408,20 @@ TRACE_EVENT(drv_bss_info_changed,
__field(u32, basic_rates)
__array(int, mcast_rate, NUM_NL80211_BANDS)
__field(u16, ht_operation_mode)
__field(s32, cqm_rssi_thold);
__field(s32, cqm_rssi_hyst);
__field(u32, channel_width);
__field(u32, channel_cfreq1);
__field(s32, cqm_rssi_thold)
__field(s32, cqm_rssi_hyst)
__field(u32, channel_width)
__field(u32, channel_cfreq1)
__dynamic_array(u32, arp_addr_list,
info->arp_addr_cnt > IEEE80211_BSS_ARP_ADDR_LIST_LEN ?
IEEE80211_BSS_ARP_ADDR_LIST_LEN :
info->arp_addr_cnt);
__field(int, arp_addr_cnt);
__field(bool, qos);
__field(bool, idle);
__field(bool, ps);
__dynamic_array(u8, ssid, info->ssid_len);
__field(bool, hidden_ssid);
info->arp_addr_cnt)
__field(int, arp_addr_cnt)
__field(bool, qos)
__field(bool, idle)
__field(bool, ps)
__dynamic_array(u8, ssid, info->ssid_len)
__field(bool, hidden_ssid)
__field(int, txpower)
__field(u8, p2p_oppps_ctwindow)
),
@ -1672,8 +1672,8 @@ TRACE_EVENT(drv_start_ap,
VIF_ENTRY
__field(u8, dtimper)
__field(u16, bcnint)
__dynamic_array(u8, ssid, info->ssid_len);
__field(bool, hidden_ssid);
__dynamic_array(u8, ssid, info->ssid_len)
__field(bool, hidden_ssid)
),
TP_fast_assign(
@ -1739,7 +1739,7 @@ TRACE_EVENT(drv_join_ibss,
VIF_ENTRY
__field(u8, dtimper)
__field(u16, bcnint)
__dynamic_array(u8, ssid, info->ssid_len);
__dynamic_array(u8, ssid, info->ssid_len)
),
TP_fast_assign(

View File

@ -2014,7 +2014,7 @@ TRACE_EVENT(rdev_start_nan,
WIPHY_ENTRY
WDEV_ENTRY
__field(u8, master_pref)
__field(u8, bands);
__field(u8, bands)
),
TP_fast_assign(
WIPHY_ASSIGN;
@ -2036,8 +2036,8 @@ TRACE_EVENT(rdev_nan_change_conf,
WIPHY_ENTRY
WDEV_ENTRY
__field(u8, master_pref)
__field(u8, bands);
__field(u32, changes);
__field(u8, bands)
__field(u32, changes)
),
TP_fast_assign(
WIPHY_ASSIGN;

View File

@ -197,7 +197,7 @@ $(OUTPUT)test-libcrypto.bin:
$(BUILD) -lcrypto
$(OUTPUT)test-gtk2.bin:
$(BUILD) $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null)
$(BUILD) $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) -Wno-deprecated-declarations
$(OUTPUT)test-gtk2-infobar.bin:
$(BUILD) $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null)

View File

@ -1,9 +1,15 @@
// SPDX-License-Identifier: GPL-2.0
#include "clang/Basic/Version.h"
#if CLANG_VERSION_MAJOR < 8
#include "clang/Basic/VirtualFileSystem.h"
#endif
#include "clang/Driver/Driver.h"
#include "clang/Frontend/TextDiagnosticPrinter.h"
#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/Support/ManagedStatic.h"
#if CLANG_VERSION_MAJOR >= 8
#include "llvm/Support/VirtualFileSystem.h"
#endif
#include "llvm/Support/raw_ostream.h"
using namespace clang;

View File

@ -15,6 +15,8 @@ void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
const unsigned long *bitmap2, int bits);
int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int bits);
int __bitmap_equal(const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int bits);
void bitmap_clear(unsigned long *map, unsigned int start, int len);
#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1)))
@ -123,6 +125,15 @@ static inline unsigned long *bitmap_alloc(int nbits)
return calloc(1, BITS_TO_LONGS(nbits) * sizeof(unsigned long));
}
/*
* bitmap_free - Free bitmap
* @bitmap: pointer to bitmap
*/
static inline void bitmap_free(unsigned long *bitmap)
{
free(bitmap);
}
/*
* bitmap_scnprintf - print bitmap list into buffer
* @bitmap: bitmap
@ -148,4 +159,23 @@ static inline int bitmap_and(unsigned long *dst, const unsigned long *src1,
return __bitmap_and(dst, src1, src2, nbits);
}
#ifdef __LITTLE_ENDIAN
#define BITMAP_MEM_ALIGNMENT 8
#else
#define BITMAP_MEM_ALIGNMENT (8 * sizeof(unsigned long))
#endif
#define BITMAP_MEM_MASK (BITMAP_MEM_ALIGNMENT - 1)
#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0)
static inline int bitmap_equal(const unsigned long *src1,
const unsigned long *src2, unsigned int nbits)
{
if (small_const_nbits(nbits))
return !((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits));
if (__builtin_constant_p(nbits & BITMAP_MEM_MASK) &&
IS_ALIGNED(nbits, BITMAP_MEM_ALIGNMENT))
return !memcmp(src1, src2, nbits / 8);
return __bitmap_equal(src1, src2, nbits);
}
#endif /* _PERF_BITOPS_H */

View File

@ -17,7 +17,15 @@ int strtobool(const char *s, bool *res);
* However uClibc headers also define __GLIBC__ hence the hack below
*/
#if defined(__GLIBC__) && !defined(__UCLIBC__)
// pragma diagnostic was introduced in gcc 4.6
#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wredundant-decls"
#endif
extern size_t strlcpy(char *dest, const char *src, size_t size);
#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
#pragma GCC diagnostic pop
#endif
#endif
char *str_error_r(int errnum, char *buf, size_t buflen);

View File

@ -210,6 +210,7 @@ static bool fs__env_override(struct fs *fs)
size_t name_len = strlen(fs->name);
/* name + "_PATH" + '\0' */
char upper_name[name_len + 5 + 1];
memcpy(upper_name, fs->name, name_len);
mem_toupper(upper_name, name_len);
strcpy(&upper_name[name_len], "_PATH");
@ -219,7 +220,8 @@ static bool fs__env_override(struct fs *fs)
return false;
fs->found = true;
strncpy(fs->path, override_path, sizeof(fs->path));
strncpy(fs->path, override_path, sizeof(fs->path) - 1);
fs->path[sizeof(fs->path) - 1] = '\0';
return true;
}

View File

@ -71,3 +71,18 @@ int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
BITMAP_LAST_WORD_MASK(bits));
return result != 0;
}
int __bitmap_equal(const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int bits)
{
unsigned int k, lim = bits/BITS_PER_LONG;
for (k = 0; k < lim; ++k)
if (bitmap1[k] != bitmap2[k])
return 0;
if (bits % BITS_PER_LONG)
if ((bitmap1[k] ^ bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits))
return 0;
return 1;
}

View File

@ -0,0 +1,156 @@
# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
# Most of this file is copied from tools/perf/Documentation/Makefile
include ../../../scripts/Makefile.include
include ../../../scripts/utilities.mak
MAN3_TXT = libperf.txt
MAN7_TXT = libperf-counting.txt libperf-sampling.txt
MAN_EX = examples/*.c
MAN_TXT = $(MAN3_TXT) $(MAN7_TXT)
_MAN_XML = $(patsubst %.txt,%.xml,$(MAN_TXT))
_MAN_HTML = $(patsubst %.txt,%.html,$(MAN_TXT))
_MAN_3 = $(patsubst %.txt,%.3,$(MAN3_TXT))
_MAN_7 = $(patsubst %.txt,%.7,$(MAN7_TXT))
MAN_XML = $(addprefix $(OUTPUT),$(_MAN_XML))
MAN_HTML = $(addprefix $(OUTPUT),$(_MAN_HTML))
MAN_3 = $(addprefix $(OUTPUT),$(_MAN_3))
MAN_7 = $(addprefix $(OUTPUT),$(_MAN_7))
MAN_X = $(MAN_3) $(MAN_7)
# Make the path relative to DESTDIR, not prefix
ifndef DESTDIR
prefix ?=$(HOME)
endif
mandir ?= $(prefix)/share/man
man3dir = $(mandir)/man3
man7dir = $(mandir)/man7
docdir ?= $(prefix)/share/doc/libperf
htmldir = $(docdir)/html
exdir = $(docdir)/examples
ASCIIDOC = asciidoc
ASCIIDOC_EXTRA = --unsafe -f asciidoc.conf
ASCIIDOC_HTML = xhtml11
MANPAGE_XSL = manpage-normal.xsl
XMLTO_EXTRA =
XMLTO =xmlto
INSTALL ?= install
RM ?= rm -f
# For asciidoc ...
# -7.1.2, no extra settings are needed.
# 8.0-, set ASCIIDOC8.
#
# For docbook-xsl ...
# -1.68.1, set ASCIIDOC_NO_ROFF? (based on changelog from 1.73.0)
# 1.69.0, no extra settings are needed?
# 1.69.1-1.71.0, set DOCBOOK_SUPPRESS_SP?
# 1.71.1, no extra settings are needed?
# 1.72.0, set DOCBOOK_XSL_172.
# 1.73.0-, set ASCIIDOC_NO_ROFF
# If you had been using DOCBOOK_XSL_172 in an attempt to get rid
# of 'the ".ft C" problem' in your generated manpages, and you
# instead ended up with weird characters around callouts, try
# using ASCIIDOC_NO_ROFF instead (it works fine with ASCIIDOC8).
ifdef ASCIIDOC8
ASCIIDOC_EXTRA += -a asciidoc7compatible
endif
ifdef DOCBOOK_XSL_172
ASCIIDOC_EXTRA += -a libperf-asciidoc-no-roff
MANPAGE_XSL = manpage-1.72.xsl
else
ifdef ASCIIDOC_NO_ROFF
# docbook-xsl after 1.72 needs the regular XSL, but will not
# pass-thru raw roff codes from asciidoc.conf, so turn them off.
ASCIIDOC_EXTRA += -a libperf-asciidoc-no-roff
endif
endif
ifdef MAN_BOLD_LITERAL
XMLTO_EXTRA += -m manpage-bold-literal.xsl
endif
ifdef DOCBOOK_SUPPRESS_SP
XMLTO_EXTRA += -m manpage-suppress-sp.xsl
endif
DESTDIR ?=
DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
export DESTDIR DESTDIR_SQ
# Please note that there is a minor bug in asciidoc.
# The version after 6.0.3 _will_ include the patch found here:
# http://marc.theaimsgroup.com/?l=libtraceevent&m=111558757202243&w=2
#
# Until that version is released you may have to apply the patch
# yourself - yes, all 6 characters of it!
QUIET_SUBDIR0 = +$(MAKE) -C # space to separate -C and subdir
QUIET_SUBDIR1 =
ifneq ($(findstring $(MAKEFLAGS),w),w)
PRINT_DIR = --no-print-directory
else # "make -w"
NO_SUBDIR = :
endif
ifneq ($(findstring $(MAKEFLAGS),s),s)
ifneq ($(V),1)
QUIET_ASCIIDOC = @echo ' ASCIIDOC '$@;
QUIET_XMLTO = @echo ' XMLTO '$@;
endif
endif
all: $(MAN_X) $(MAN_HTML)
$(MAN_HTML) $(MAN_X): asciidoc.conf
install-man: all
$(call QUIET_INSTALL, man) \
$(INSTALL) -d -m 755 $(DESTDIR)$(man3dir); \
$(INSTALL) -m 644 $(MAN_3) $(DESTDIR)$(man3dir); \
$(INSTALL) -d -m 755 $(DESTDIR)$(man7dir); \
$(INSTALL) -m 644 $(MAN_7) $(DESTDIR)$(man7dir);
install-html:
$(call QUIET_INSTALL, html) \
$(INSTALL) -d -m 755 $(DESTDIR)$(htmldir); \
$(INSTALL) -m 644 $(MAN_HTML) $(DESTDIR)$(htmldir); \
install-examples:
$(call QUIET_INSTALL, examples) \
$(INSTALL) -d -m 755 $(DESTDIR)$(exdir); \
$(INSTALL) -m 644 $(MAN_EX) $(DESTDIR)$(exdir); \
CLEAN_FILES = \
$(MAN_XML) $(addsuffix +,$(MAN_XML)) \
$(MAN_HTML) $(addsuffix +,$(MAN_HTML)) \
$(MAN_X)
clean:
$(call QUIET_CLEAN, Documentation) $(RM) $(CLEAN_FILES)
$(MAN_3): $(OUTPUT)%.3: %.xml
$(QUIET_XMLTO)$(XMLTO) -o $(OUTPUT). -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
$(MAN_7): $(OUTPUT)%.7: %.xml
$(QUIET_XMLTO)$(XMLTO) -o $(OUTPUT). -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
$(MAN_XML): $(OUTPUT)%.xml: %.txt
$(QUIET_ASCIIDOC)$(ASCIIDOC) -b docbook -d manpage \
$(ASCIIDOC_EXTRA) -alibperf_version=$(EVENT_PARSE_VERSION) -o $@+ $< && \
mv $@+ $@
$(MAN_HTML): $(OUTPUT)%.html: %.txt
$(QUIET_ASCIIDOC)$(ASCIIDOC) -b $(ASCIIDOC_HTML) -d manpage \
$(ASCIIDOC_EXTRA) -aperf_version=$(EVENT_PARSE_VERSION) -o $@+ $< && \
mv $@+ $@

View File

@ -0,0 +1,120 @@
## linktep: macro
#
# Usage: linktep:command[manpage-section]
#
# Note, {0} is the manpage section, while {target} is the command.
#
# Show TEP link as: <command>(<section>); if section is defined, else just show
# the command.
[macros]
(?su)[\\]?(?P<name>linktep):(?P<target>\S*?)\[(?P<attrlist>.*?)\]=
[attributes]
asterisk=&#42;
plus=&#43;
caret=&#94;
startsb=&#91;
endsb=&#93;
tilde=&#126;
ifdef::backend-docbook[]
[linktep-inlinemacro]
{0%{target}}
{0#<citerefentry>}
{0#<refentrytitle>{target}</refentrytitle><manvolnum>{0}</manvolnum>}
{0#</citerefentry>}
endif::backend-docbook[]
ifdef::backend-docbook[]
ifndef::tep-asciidoc-no-roff[]
# "unbreak" docbook-xsl v1.68 for manpages. v1.69 works with or without this.
# v1.72 breaks with this because it replaces dots not in roff requests.
[listingblock]
<example><title>{title}</title>
<literallayout>
ifdef::doctype-manpage[]
&#10;.ft C&#10;
endif::doctype-manpage[]
|
ifdef::doctype-manpage[]
&#10;.ft&#10;
endif::doctype-manpage[]
</literallayout>
{title#}</example>
endif::tep-asciidoc-no-roff[]
ifdef::tep-asciidoc-no-roff[]
ifdef::doctype-manpage[]
# The following two small workarounds insert a simple paragraph after screen
[listingblock]
<example><title>{title}</title>
<literallayout>
|
</literallayout><simpara></simpara>
{title#}</example>
[verseblock]
<formalpara{id? id="{id}"}><title>{title}</title><para>
{title%}<literallayout{id? id="{id}"}>
{title#}<literallayout>
|
</literallayout>
{title#}</para></formalpara>
{title%}<simpara></simpara>
endif::doctype-manpage[]
endif::tep-asciidoc-no-roff[]
endif::backend-docbook[]
ifdef::doctype-manpage[]
ifdef::backend-docbook[]
[header]
template::[header-declarations]
<refentry>
<refmeta>
<refentrytitle>{mantitle}</refentrytitle>
<manvolnum>{manvolnum}</manvolnum>
<refmiscinfo class="source">libperf</refmiscinfo>
<refmiscinfo class="version">{libperf_version}</refmiscinfo>
<refmiscinfo class="manual">libperf Manual</refmiscinfo>
</refmeta>
<refnamediv>
<refname>{manname1}</refname>
<refname>{manname2}</refname>
<refname>{manname3}</refname>
<refname>{manname4}</refname>
<refname>{manname5}</refname>
<refname>{manname6}</refname>
<refname>{manname7}</refname>
<refname>{manname8}</refname>
<refname>{manname9}</refname>
<refname>{manname10}</refname>
<refname>{manname11}</refname>
<refname>{manname12}</refname>
<refname>{manname13}</refname>
<refname>{manname14}</refname>
<refname>{manname15}</refname>
<refname>{manname16}</refname>
<refname>{manname17}</refname>
<refname>{manname18}</refname>
<refname>{manname19}</refname>
<refname>{manname20}</refname>
<refname>{manname21}</refname>
<refname>{manname22}</refname>
<refname>{manname23}</refname>
<refname>{manname24}</refname>
<refname>{manname25}</refname>
<refname>{manname26}</refname>
<refname>{manname27}</refname>
<refname>{manname28}</refname>
<refname>{manname29}</refname>
<refname>{manname30}</refname>
<refpurpose>{manpurpose}</refpurpose>
</refnamediv>
endif::backend-docbook[]
endif::doctype-manpage[]
ifdef::backend-xhtml11[]
[linktep-inlinemacro]
<a href="{target}.html">{target}{0?({0})}</a>
endif::backend-xhtml11[]

View File

@ -0,0 +1,119 @@
#include <linux/perf_event.h>
#include <perf/evlist.h>
#include <perf/evsel.h>
#include <perf/cpumap.h>
#include <perf/threadmap.h>
#include <perf/mmap.h>
#include <perf/core.h>
#include <perf/event.h>
#include <stdio.h>
#include <unistd.h>
static int libperf_print(enum libperf_print_level level,
const char *fmt, va_list ap)
{
return vfprintf(stderr, fmt, ap);
}
union u64_swap {
__u64 val64;
__u32 val32[2];
};
int main(int argc, char **argv)
{
struct perf_evlist *evlist;
struct perf_evsel *evsel;
struct perf_mmap *map;
struct perf_cpu_map *cpus;
struct perf_event_attr attr = {
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES,
.disabled = 1,
.freq = 1,
.sample_freq = 10,
.sample_type = PERF_SAMPLE_IP|PERF_SAMPLE_TID|PERF_SAMPLE_CPU|PERF_SAMPLE_PERIOD,
};
int err = -1;
union perf_event *event;
libperf_init(libperf_print);
cpus = perf_cpu_map__new(NULL);
if (!cpus) {
fprintf(stderr, "failed to create cpus\n");
return -1;
}
evlist = perf_evlist__new();
if (!evlist) {
fprintf(stderr, "failed to create evlist\n");
goto out_cpus;
}
evsel = perf_evsel__new(&attr);
if (!evsel) {
fprintf(stderr, "failed to create cycles\n");
goto out_cpus;
}
perf_evlist__add(evlist, evsel);
perf_evlist__set_maps(evlist, cpus, NULL);
err = perf_evlist__open(evlist);
if (err) {
fprintf(stderr, "failed to open evlist\n");
goto out_evlist;
}
err = perf_evlist__mmap(evlist, 4);
if (err) {
fprintf(stderr, "failed to mmap evlist\n");
goto out_evlist;
}
perf_evlist__enable(evlist);
sleep(3);
perf_evlist__disable(evlist);
perf_evlist__for_each_mmap(evlist, map, false) {
if (perf_mmap__read_init(map) < 0)
continue;
while ((event = perf_mmap__read_event(map)) != NULL) {
int cpu, pid, tid;
__u64 ip, period, *array;
union u64_swap u;
array = event->sample.array;
ip = *array;
array++;
u.val64 = *array;
pid = u.val32[0];
tid = u.val32[1];
array++;
u.val64 = *array;
cpu = u.val32[0];
array++;
period = *array;
fprintf(stdout, "cpu %3d, pid %6d, tid %6d, ip %20llx, period %20llu\n",
cpu, pid, tid, ip, period);
perf_mmap__consume(map);
}
perf_mmap__read_done(map);
}
out_evlist:
perf_evlist__delete(evlist);
out_cpus:
perf_cpu_map__put(cpus);
return err;
}

View File

@ -0,0 +1,211 @@
libperf-counting(7)
===================
NAME
----
libperf-counting - counting interface
DESCRIPTION
-----------
The counting interface provides API to meassure and get count for specific perf events.
The following test tries to explain count on `counting.c` example.
It is by no means complete guide to counting, but shows libperf basic API for counting.
The `counting.c` comes with libbperf package and can be compiled and run like:
[source,bash]
--
$ gcc -o counting counting.c -lperf
$ sudo ./counting
count 176792, enabled 176944, run 176944
count 176242, enabled 176242, run 176242
--
It requires root access, because of the `PERF_COUNT_SW_CPU_CLOCK` event,
which is available only for root.
The `counting.c` example monitors two events on the current process and displays their count, in a nutshel it:
* creates events
* adds them to the event list
* opens and enables events through the event list
* does some workload
* disables events
* reads and displays event counts
* destroys the event list
The first thing you need to do before using libperf is to call init function:
[source,c]
--
8 static int libperf_print(enum libperf_print_level level,
9 const char *fmt, va_list ap)
10 {
11 return vfprintf(stderr, fmt, ap);
12 }
14 int main(int argc, char **argv)
15 {
...
35 libperf_init(libperf_print);
--
It will setup the library and sets function for debug output from library.
The `libperf_print` callback will receive any message with its debug level,
defined as:
[source,c]
--
enum libperf_print_level {
LIBPERF_ERR,
LIBPERF_WARN,
LIBPERF_INFO,
LIBPERF_DEBUG,
LIBPERF_DEBUG2,
LIBPERF_DEBUG3,
};
--
Once the setup is complete we start by defining specific events using the `struct perf_event_attr`.
We create software events for cpu and task:
[source,c]
--
20 struct perf_event_attr attr1 = {
21 .type = PERF_TYPE_SOFTWARE,
22 .config = PERF_COUNT_SW_CPU_CLOCK,
23 .read_format = PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING,
24 .disabled = 1,
25 };
26 struct perf_event_attr attr2 = {
27 .type = PERF_TYPE_SOFTWARE,
28 .config = PERF_COUNT_SW_TASK_CLOCK,
29 .read_format = PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING,
30 .disabled = 1,
31 };
--
The `read_format` setup tells perf to include timing details together with each count.
Next step is to prepare threads map.
In this case we will monitor current process, so we create threads map with single pid (0):
[source,c]
--
37 threads = perf_thread_map__new_dummy();
38 if (!threads) {
39 fprintf(stderr, "failed to create threads\n");
40 return -1;
41 }
42
43 perf_thread_map__set_pid(threads, 0, 0);
--
Now we create libperf's event list, which will serve as holder for the events we want:
[source,c]
--
45 evlist = perf_evlist__new();
46 if (!evlist) {
47 fprintf(stderr, "failed to create evlist\n");
48 goto out_threads;
49 }
--
We create libperf's events for the attributes we defined earlier and add them to the list:
[source,c]
--
51 evsel = perf_evsel__new(&attr1);
52 if (!evsel) {
53 fprintf(stderr, "failed to create evsel1\n");
54 goto out_evlist;
55 }
56
57 perf_evlist__add(evlist, evsel);
58
59 evsel = perf_evsel__new(&attr2);
60 if (!evsel) {
61 fprintf(stderr, "failed to create evsel2\n");
62 goto out_evlist;
63 }
64
65 perf_evlist__add(evlist, evsel);
--
Configure event list with the thread map and open events:
[source,c]
--
67 perf_evlist__set_maps(evlist, NULL, threads);
68
69 err = perf_evlist__open(evlist);
70 if (err) {
71 fprintf(stderr, "failed to open evsel\n");
72 goto out_evlist;
73 }
--
Both events are created as disabled (note the `disabled = 1` assignment above),
so we need to enable the whole list explicitely (both events).
From this moment events are counting and we can do our workload.
When we are done we disable the events list.
[source,c]
--
75 perf_evlist__enable(evlist);
76
77 while (count--);
78
79 perf_evlist__disable(evlist);
--
Now we need to get the counts from events, following code iterates throught the events list and read counts:
[source,c]
--
81 perf_evlist__for_each_evsel(evlist, evsel) {
82 perf_evsel__read(evsel, 0, 0, &counts);
83 fprintf(stdout, "count %llu, enabled %llu, run %llu\n",
84 counts.val, counts.ena, counts.run);
85 }
--
And finaly cleanup.
We close the whole events list (both events) and remove it together with the threads map:
[source,c]
--
87 perf_evlist__close(evlist);
88
89 out_evlist:
90 perf_evlist__delete(evlist);
91 out_threads:
92 perf_thread_map__put(threads);
93 return err;
94 }
--
REPORTING BUGS
--------------
Report bugs to <linux-perf-users@vger.kernel.org>.
LICENSE
-------
libperf is Free Software licensed under the GNU LGPL 2.1
RESOURCES
---------
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
SEE ALSO
--------
libperf(3), libperf-sampling(7)

View File

@ -0,0 +1,243 @@
libperf-sampling(7)
===================
NAME
----
libperf-sampling - sampling interface
DESCRIPTION
-----------
The sampling interface provides API to meassure and get count for specific perf events.
The following test tries to explain count on `sampling.c` example.
It is by no means complete guide to sampling, but shows libperf basic API for sampling.
The `sampling.c` comes with libbperf package and can be compiled and run like:
[source,bash]
--
$ gcc -o sampling sampling.c -lperf
$ sudo ./sampling
cpu 0, pid 0, tid 0, ip ffffffffad06c4e6, period 1
cpu 0, pid 4465, tid 4469, ip ffffffffad118748, period 18322959
cpu 0, pid 0, tid 0, ip ffffffffad115722, period 33544846
cpu 0, pid 4465, tid 4470, ip 7f84fe0cdad6, period 23687474
cpu 0, pid 0, tid 0, ip ffffffffad9e0349, period 34255790
cpu 0, pid 4465, tid 4469, ip ffffffffad136581, period 38664069
cpu 0, pid 0, tid 0, ip ffffffffad9e55e2, period 21922384
cpu 0, pid 4465, tid 4470, ip 7f84fe0ebebf, period 17655175
...
--
It requires root access, because it uses hardware cycles event.
The `sampling.c` example profiles/samples all CPUs with hardware cycles, in a nutshel it:
- creates events
- adds them to the event list
- opens and enables events through the event list
- sleeps for 3 seconds
- disables events
- reads and displays recorded samples
- destroys the event list
The first thing you need to do before using libperf is to call init function:
[source,c]
--
12 static int libperf_print(enum libperf_print_level level,
13 const char *fmt, va_list ap)
14 {
15 return vfprintf(stderr, fmt, ap);
16 }
23 int main(int argc, char **argv)
24 {
...
40 libperf_init(libperf_print);
--
It will setup the library and sets function for debug output from library.
The `libperf_print` callback will receive any message with its debug level,
defined as:
[source,c]
--
enum libperf_print_level {
LIBPERF_ERR,
LIBPERF_WARN,
LIBPERF_INFO,
LIBPERF_DEBUG,
LIBPERF_DEBUG2,
LIBPERF_DEBUG3,
};
--
Once the setup is complete we start by defining cycles event using the `struct perf_event_attr`:
[source,c]
--
29 struct perf_event_attr attr = {
30 .type = PERF_TYPE_HARDWARE,
31 .config = PERF_COUNT_HW_CPU_CYCLES,
32 .disabled = 1,
33 .freq = 1,
34 .sample_freq = 10,
35 .sample_type = PERF_SAMPLE_IP|PERF_SAMPLE_TID|PERF_SAMPLE_CPU|PERF_SAMPLE_PERIOD,
36 };
--
Next step is to prepare cpus map.
In this case we will monitor all the available CPUs:
[source,c]
--
42 cpus = perf_cpu_map__new(NULL);
43 if (!cpus) {
44 fprintf(stderr, "failed to create cpus\n");
45 return -1;
46 }
--
Now we create libperf's event list, which will serve as holder for the cycles event:
[source,c]
--
48 evlist = perf_evlist__new();
49 if (!evlist) {
50 fprintf(stderr, "failed to create evlist\n");
51 goto out_cpus;
52 }
--
We create libperf's event for the cycles attribute we defined earlier and add it to the list:
[source,c]
--
54 evsel = perf_evsel__new(&attr);
55 if (!evsel) {
56 fprintf(stderr, "failed to create cycles\n");
57 goto out_cpus;
58 }
59
60 perf_evlist__add(evlist, evsel);
--
Configure event list with the cpus map and open event:
[source,c]
--
62 perf_evlist__set_maps(evlist, cpus, NULL);
63
64 err = perf_evlist__open(evlist);
65 if (err) {
66 fprintf(stderr, "failed to open evlist\n");
67 goto out_evlist;
68 }
--
Once the events list is open, we can create memory maps AKA perf ring buffers:
[source,c]
--
70 err = perf_evlist__mmap(evlist, 4);
71 if (err) {
72 fprintf(stderr, "failed to mmap evlist\n");
73 goto out_evlist;
74 }
--
The event is created as disabled (note the `disabled = 1` assignment above),
so we need to enable the events list explicitely.
From this moment the cycles event is sampling.
We will sleep for 3 seconds while the ring buffers get data from all CPUs, then we disable the events list.
[source,c]
--
76 perf_evlist__enable(evlist);
77 sleep(3);
78 perf_evlist__disable(evlist);
--
Following code walks through the ring buffers and reads stored events/samples:
[source,c]
--
80 perf_evlist__for_each_mmap(evlist, map, false) {
81 if (perf_mmap__read_init(map) < 0)
82 continue;
83
84 while ((event = perf_mmap__read_event(map)) != NULL) {
/* process event */
108 perf_mmap__consume(map);
109 }
110 perf_mmap__read_done(map);
111 }
--
Each sample needs to get parsed:
[source,c]
--
85 int cpu, pid, tid;
86 __u64 ip, period, *array;
87 union u64_swap u;
88
89 array = event->sample.array;
90
91 ip = *array;
92 array++;
93
94 u.val64 = *array;
95 pid = u.val32[0];
96 tid = u.val32[1];
97 array++;
98
99 u.val64 = *array;
100 cpu = u.val32[0];
101 array++;
102
103 period = *array;
104
105 fprintf(stdout, "cpu %3d, pid %6d, tid %6d, ip %20llx, period %20llu\n",
106 cpu, pid, tid, ip, period);
--
And finaly cleanup.
We close the whole events list (both events) and remove it together with the threads map:
[source,c]
--
113 out_evlist:
114 perf_evlist__delete(evlist);
115 out_cpus:
116 perf_cpu_map__put(cpus);
117 return err;
118 }
--
REPORTING BUGS
--------------
Report bugs to <linux-perf-users@vger.kernel.org>.
LICENSE
-------
libperf is Free Software licensed under the GNU LGPL 2.1
RESOURCES
---------
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
SEE ALSO
--------
libperf(3), libperf-counting(7)

View File

@ -0,0 +1,246 @@
libperf(3)
==========
NAME
----
libperf - Linux kernel perf event library
SYNOPSIS
--------
*Generic API:*
[source,c]
--
#include <perf/core.h>
enum libperf_print_level {
LIBPERF_ERR,
LIBPERF_WARN,
LIBPERF_INFO,
LIBPERF_DEBUG,
LIBPERF_DEBUG2,
LIBPERF_DEBUG3,
};
typedef int (*libperf_print_fn_t)(enum libperf_print_level level,
const char *, va_list ap);
void libperf_init(libperf_print_fn_t fn);
--
*API to handle cpu maps:*
[source,c]
--
#include <perf/cpumap.h>
struct perf_cpu_map;
struct perf_cpu_map *perf_cpu_map__dummy_new(void);
struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list);
struct perf_cpu_map *perf_cpu_map__read(FILE *file);
struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map);
struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
struct perf_cpu_map *other);
void perf_cpu_map__put(struct perf_cpu_map *map);
int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
bool perf_cpu_map__empty(const struct perf_cpu_map *map);
int perf_cpu_map__max(struct perf_cpu_map *map);
#define perf_cpu_map__for_each_cpu(cpu, idx, cpus)
--
*API to handle thread maps:*
[source,c]
--
#include <perf/threadmap.h>
struct perf_thread_map;
struct perf_thread_map *perf_thread_map__new_dummy(void);
void perf_thread_map__set_pid(struct perf_thread_map *map, int thread, pid_t pid);
char *perf_thread_map__comm(struct perf_thread_map *map, int thread);
int perf_thread_map__nr(struct perf_thread_map *threads);
pid_t perf_thread_map__pid(struct perf_thread_map *map, int thread);
struct perf_thread_map *perf_thread_map__get(struct perf_thread_map *map);
void perf_thread_map__put(struct perf_thread_map *map);
--
*API to handle event lists:*
[source,c]
--
#include <perf/evlist.h>
struct perf_evlist;
void perf_evlist__add(struct perf_evlist *evlist,
struct perf_evsel *evsel);
void perf_evlist__remove(struct perf_evlist *evlist,
struct perf_evsel *evsel);
struct perf_evlist *perf_evlist__new(void);
void perf_evlist__delete(struct perf_evlist *evlist);
struct perf_evsel* perf_evlist__next(struct perf_evlist *evlist,
struct perf_evsel *evsel);
int perf_evlist__open(struct perf_evlist *evlist);
void perf_evlist__close(struct perf_evlist *evlist);
void perf_evlist__enable(struct perf_evlist *evlist);
void perf_evlist__disable(struct perf_evlist *evlist);
#define perf_evlist__for_each_evsel(evlist, pos)
void perf_evlist__set_maps(struct perf_evlist *evlist,
struct perf_cpu_map *cpus,
struct perf_thread_map *threads);
int perf_evlist__poll(struct perf_evlist *evlist, int timeout);
int perf_evlist__filter_pollfd(struct perf_evlist *evlist,
short revents_and_mask);
int perf_evlist__mmap(struct perf_evlist *evlist, int pages);
void perf_evlist__munmap(struct perf_evlist *evlist);
struct perf_mmap *perf_evlist__next_mmap(struct perf_evlist *evlist,
struct perf_mmap *map,
bool overwrite);
#define perf_evlist__for_each_mmap(evlist, pos, overwrite)
--
*API to handle events:*
[source,c]
--
#include <perf/evsel.h>*
struct perf_evsel;
struct perf_counts_values {
union {
struct {
uint64_t val;
uint64_t ena;
uint64_t run;
};
uint64_t values[3];
};
};
struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr);
void perf_evsel__delete(struct perf_evsel *evsel);
int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads);
void perf_evsel__close(struct perf_evsel *evsel);
void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu);
int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
struct perf_counts_values *count);
int perf_evsel__enable(struct perf_evsel *evsel);
int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu);
int perf_evsel__disable(struct perf_evsel *evsel);
int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu);
struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel);
struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel);
struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel);
--
*API to handle maps (perf ring buffers):*
[source,c]
--
#include <perf/mmap.h>
struct perf_mmap;
void perf_mmap__consume(struct perf_mmap *map);
int perf_mmap__read_init(struct perf_mmap *map);
void perf_mmap__read_done(struct perf_mmap *map);
union perf_event *perf_mmap__read_event(struct perf_mmap *map);
--
*Structures to access perf API events:*
[source,c]
--
#include <perf/event.h>
struct perf_record_mmap;
struct perf_record_mmap2;
struct perf_record_comm;
struct perf_record_namespaces;
struct perf_record_fork;
struct perf_record_lost;
struct perf_record_lost_samples;
struct perf_record_read;
struct perf_record_throttle;
struct perf_record_ksymbol;
struct perf_record_bpf_event;
struct perf_record_sample;
struct perf_record_switch;
struct perf_record_header_attr;
struct perf_record_record_cpu_map;
struct perf_record_cpu_map_data;
struct perf_record_cpu_map;
struct perf_record_event_update_cpus;
struct perf_record_event_update_scale;
struct perf_record_event_update;
struct perf_trace_event_type;
struct perf_record_header_event_type;
struct perf_record_header_tracing_data;
struct perf_record_header_build_id;
struct perf_record_id_index;
struct perf_record_auxtrace_info;
struct perf_record_auxtrace;
struct perf_record_auxtrace_error;
struct perf_record_aux;
struct perf_record_itrace_start;
struct perf_record_thread_map_entry;
struct perf_record_thread_map;
struct perf_record_stat_config_entry;
struct perf_record_stat_config;
struct perf_record_stat;
struct perf_record_stat_round;
struct perf_record_time_conv;
struct perf_record_header_feature;
struct perf_record_compressed;
--
DESCRIPTION
-----------
The libperf library provides an API to access the linux kernel perf
events subsystem.
Following objects are key to the libperf interface:
[horizontal]
struct perf_cpu_map:: Provides a cpu list abstraction.
struct perf_thread_map:: Provides a thread list abstraction.
struct perf_evsel:: Provides an abstraction for single a perf event.
struct perf_evlist:: Gathers several struct perf_evsel object and performs functions on all of them.
struct perf_mmap:: Provides an abstraction for accessing perf ring buffer.
The exported API functions bind these objects together.
REPORTING BUGS
--------------
Report bugs to <linux-perf-users@vger.kernel.org>.
LICENSE
-------
libperf is Free Software licensed under the GNU LGPL 2.1
RESOURCES
---------
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
SEE ALSO
--------
libperf-sampling(7), libperf-counting(7)

View File

@ -0,0 +1,14 @@
<!-- manpage-1.72.xsl:
special settings for manpages rendered from asciidoc+docbook
handles peculiarities in docbook-xsl 1.72.0 -->
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="1.0">
<xsl:import href="manpage-base.xsl"/>
<!-- these are the special values for the roff control characters
needed for docbook-xsl 1.72.0 -->
<xsl:param name="git.docbook.backslash">&#x2593;</xsl:param>
<xsl:param name="git.docbook.dot" >&#x2302;</xsl:param>
</xsl:stylesheet>

View File

@ -0,0 +1,35 @@
<!-- manpage-base.xsl:
special formatting for manpages rendered from asciidoc+docbook -->
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="1.0">
<!-- these params silence some output from xmlto -->
<xsl:param name="man.output.quietly" select="1"/>
<xsl:param name="refentry.meta.get.quietly" select="1"/>
<!-- convert asciidoc callouts to man page format;
git.docbook.backslash and git.docbook.dot params
must be supplied by another XSL file or other means -->
<xsl:template match="co">
<xsl:value-of select="concat(
$git.docbook.backslash,'fB(',
substring-after(@id,'-'),')',
$git.docbook.backslash,'fR')"/>
</xsl:template>
<xsl:template match="calloutlist">
<xsl:value-of select="$git.docbook.dot"/>
<xsl:text>sp&#10;</xsl:text>
<xsl:apply-templates/>
<xsl:text>&#10;</xsl:text>
</xsl:template>
<xsl:template match="callout">
<xsl:value-of select="concat(
$git.docbook.backslash,'fB',
substring-after(@arearefs,'-'),
'. ',$git.docbook.backslash,'fR')"/>
<xsl:apply-templates/>
<xsl:value-of select="$git.docbook.dot"/>
<xsl:text>br&#10;</xsl:text>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,17 @@
<!-- manpage-bold-literal.xsl:
special formatting for manpages rendered from asciidoc+docbook -->
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="1.0">
<!-- render literal text as bold (instead of plain or monospace);
this makes literal text easier to distinguish in manpages
viewed on a tty -->
<xsl:template match="literal">
<xsl:value-of select="$git.docbook.backslash"/>
<xsl:text>fB</xsl:text>
<xsl:apply-templates/>
<xsl:value-of select="$git.docbook.backslash"/>
<xsl:text>fR</xsl:text>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,13 @@
<!-- manpage-normal.xsl:
special settings for manpages rendered from asciidoc+docbook
handles anything we want to keep away from docbook-xsl 1.72.0 -->
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="1.0">
<xsl:import href="manpage-base.xsl"/>
<!-- these are the normal values for the roff control characters -->
<xsl:param name="git.docbook.backslash">\</xsl:param>
<xsl:param name="git.docbook.dot" >.</xsl:param>
</xsl:stylesheet>

View File

@ -0,0 +1,21 @@
<!-- manpage-suppress-sp.xsl:
special settings for manpages rendered from asciidoc+docbook
handles erroneous, inline .sp in manpage output of some
versions of docbook-xsl -->
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="1.0">
<!-- attempt to work around spurious .sp at the tail of the line
that some versions of docbook stylesheets seem to add -->
<xsl:template match="simpara">
<xsl:variable name="content">
<xsl:apply-templates/>
</xsl:variable>
<xsl:value-of select="normalize-space($content)"/>
<xsl:if test="not(ancestor::authorblurb) and
not(ancestor::personblurb)">
<xsl:text>&#10;&#10;</xsl:text>
</xsl:if>
</xsl:template>
</xsl:stylesheet>

View File

@ -60,7 +60,7 @@ else
endif
INCLUDES = \
-I$(srctree)/tools/perf/lib/include \
-I$(srctree)/tools/lib/perf/include \
-I$(srctree)/tools/lib/ \
-I$(srctree)/tools/include \
-I$(srctree)/tools/arch/$(SRCARCH)/include/ \
@ -181,7 +181,10 @@ install_pkgconfig: $(LIBPERF_PC)
$(call QUIET_INSTALL, $(LIBPERF_PC)) \
$(call do_install,$(LIBPERF_PC),$(libdir_SQ)/pkgconfig,644)
install: install_lib install_headers install_pkgconfig
install_doc:
$(Q)$(MAKE) -C Documentation install-man install-html install-examples
install: install_lib install_headers install_pkgconfig install_doc
FORCE:

View File

@ -164,6 +164,9 @@ void perf_evlist__set_maps(struct perf_evlist *evlist,
evlist->threads = perf_thread_map__get(threads);
}
if (!evlist->all_cpus && cpus)
evlist->all_cpus = perf_cpu_map__get(cpus);
perf_evlist__propagate_maps(evlist);
}

View File

@ -16,7 +16,7 @@ all:
include $(srctree)/tools/scripts/Makefile.include
INCLUDE = -I$(srctree)/tools/perf/lib/include -I$(srctree)/tools/include -I$(srctree)/tools/lib
INCLUDE = -I$(srctree)/tools/lib/perf/include -I$(srctree)/tools/include -I$(srctree)/tools/lib
$(TESTS_A): FORCE
$(QUIET_LINK)$(CC) $(INCLUDE) $(CFLAGS) -o $@ $(subst -a,.c,$@) ../libperf.a $(LIBAPI)

View File

@ -96,6 +96,10 @@ int strtobool(const char *s, bool *res)
* If libc has strlcpy() then that version will override this
* implementation:
*/
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wignored-attributes"
#endif
size_t __weak strlcpy(char *dest, const char *src, size_t size)
{
size_t ret = strlen(src);
@ -107,6 +111,9 @@ size_t __weak strlcpy(char *dest, const char *src, size_t size)
}
return ret;
}
#ifdef __clang__
#pragma clang diagnostic pop
#endif
/**
* skip_spaces - Removes leading whitespace from @str.

View File

@ -112,6 +112,12 @@ OPTIONS
--objdump=<path>::
Path to objdump binary.
--prefix=PREFIX::
--prefix-strip=N::
Remove first N entries from source file path names in executables
and add PREFIX. This allows to display source code compiled on systems
with different file system layout.
--skip-missing::
Skip symbols that cannot be annotated.

View File

@ -367,6 +367,12 @@ OPTIONS
--objdump=<path>::
Path to objdump binary.
--prefix=PREFIX::
--prefix-strip=N::
Remove first N entries from source file path names in executables
and add PREFIX. This allows to display source code compiled on systems
with different file system layout.
--group::
Show event group information together. It forces group output also
if there are no groups defined in data file.

View File

@ -110,6 +110,10 @@ OPTIONS for 'perf sched timehist'
--max-stack::
Maximum number of functions to display in backtrace, default 5.
-C=::
--cpu=::
Only show events for the given CPU(s) (comma separated list).
-p=::
--pid=::
Only show events for given process ID (comma separated list).

View File

@ -158,6 +158,12 @@ Default is to monitor all CPUS.
-M::
--disassembler-style=:: Set disassembler style for objdump.
--prefix=PREFIX::
--prefix-strip=N::
Remove first N entries from source file path names in executables
and add PREFIX. This allows to display source code compiled on systems
with different file system layout.
--source::
Interleave source code with assembly code. Enabled by default,
disable with --no-source.

View File

@ -7,6 +7,7 @@ tools/lib/traceevent
tools/lib/api
tools/lib/bpf
tools/lib/subcmd
tools/lib/perf
tools/lib/argv_split.c
tools/lib/ctype.c
tools/lib/hweight.c

View File

@ -286,7 +286,7 @@ ifeq ($(DEBUG),0)
endif
endif
INC_FLAGS += -I$(src-perf)/lib/include
INC_FLAGS += -I$(srctree)/tools/lib/perf/include
INC_FLAGS += -I$(src-perf)/util/include
INC_FLAGS += -I$(src-perf)/arch/$(SRCARCH)/include
INC_FLAGS += -I$(srctree)/tools/include/

View File

@ -230,7 +230,7 @@ LIB_DIR = $(srctree)/tools/lib/api/
TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/
BPF_DIR = $(srctree)/tools/lib/bpf/
SUBCMD_DIR = $(srctree)/tools/lib/subcmd/
LIBPERF_DIR = $(srctree)/tools/perf/lib/
LIBPERF_DIR = $(srctree)/tools/lib/perf/
# Set FEATURE_TESTS to 'all' so all possible feature checkers are executed.
# Without this setting the output feature dump file misses some features, for

Some files were not shown because too many files have changed in this diff Show More