mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-22 19:49:18 +07:00
perf: Add generic memory sampling interface
This patch adds PERF_SAMPLE_DATA_SRC. PERF_SAMPLE_DATA_SRC collects the data source, i.e., where did the data associated with the sampled instruction come from. Information is stored in a perf_mem_data_src structure. It contains opcode, mem level, tlb, snoop, lock information, subject to availability in hardware. Signed-off-by: Stephane Eranian <eranian@google.com> Cc: peterz@infradead.org Cc: ak@linux.intel.com Cc: acme@redhat.com Cc: jolsa@redhat.com Cc: namhyung.kim@lge.com Link: http://lkml.kernel.org/r/1359040242-8269-8-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
c3feedf2aa
commit
d6be9ad6c9
@ -568,6 +568,7 @@ struct perf_sample_data {
|
||||
u32 reserved;
|
||||
} cpu_entry;
|
||||
u64 period;
|
||||
union perf_mem_data_src data_src;
|
||||
struct perf_callchain_entry *callchain;
|
||||
struct perf_raw_record *raw;
|
||||
struct perf_branch_stack *br_stack;
|
||||
@ -588,6 +589,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
|
||||
data->regs_user.regs = NULL;
|
||||
data->stack_user_size = 0;
|
||||
data->weight = 0;
|
||||
data->data_src.val = 0;
|
||||
}
|
||||
|
||||
extern void perf_output_sample(struct perf_output_handle *handle,
|
||||
|
@ -133,9 +133,9 @@ enum perf_event_sample_format {
|
||||
PERF_SAMPLE_REGS_USER = 1U << 12,
|
||||
PERF_SAMPLE_STACK_USER = 1U << 13,
|
||||
PERF_SAMPLE_WEIGHT = 1U << 14,
|
||||
PERF_SAMPLE_DATA_SRC = 1U << 15,
|
||||
|
||||
PERF_SAMPLE_MAX = 1U << 15, /* non-ABI */
|
||||
|
||||
PERF_SAMPLE_MAX = 1U << 16, /* non-ABI */
|
||||
};
|
||||
|
||||
/*
|
||||
@ -592,6 +592,7 @@ enum perf_event_type {
|
||||
* u64 dyn_size; } && PERF_SAMPLE_STACK_USER
|
||||
*
|
||||
* { u64 weight; } && PERF_SAMPLE_WEIGHT
|
||||
* { u64 data_src; } && PERF_SAMPLE_DATA_SRC
|
||||
* };
|
||||
*/
|
||||
PERF_RECORD_SAMPLE = 9,
|
||||
@ -617,4 +618,67 @@ enum perf_callchain_context {
|
||||
#define PERF_FLAG_FD_OUTPUT (1U << 1)
|
||||
#define PERF_FLAG_PID_CGROUP (1U << 2) /* pid=cgroup id, per-cpu mode only */
|
||||
|
||||
union perf_mem_data_src {
|
||||
__u64 val;
|
||||
struct {
|
||||
__u64 mem_op:5, /* type of opcode */
|
||||
mem_lvl:14, /* memory hierarchy level */
|
||||
mem_snoop:5, /* snoop mode */
|
||||
mem_lock:2, /* lock instr */
|
||||
mem_dtlb:7, /* tlb access */
|
||||
mem_rsvd:31;
|
||||
};
|
||||
};
|
||||
|
||||
/* type of opcode (load/store/prefetch,code) */
|
||||
#define PERF_MEM_OP_NA 0x01 /* not available */
|
||||
#define PERF_MEM_OP_LOAD 0x02 /* load instruction */
|
||||
#define PERF_MEM_OP_STORE 0x04 /* store instruction */
|
||||
#define PERF_MEM_OP_PFETCH 0x08 /* prefetch */
|
||||
#define PERF_MEM_OP_EXEC 0x10 /* code (execution) */
|
||||
#define PERF_MEM_OP_SHIFT 0
|
||||
|
||||
/* memory hierarchy (memory level, hit or miss) */
|
||||
#define PERF_MEM_LVL_NA 0x01 /* not available */
|
||||
#define PERF_MEM_LVL_HIT 0x02 /* hit level */
|
||||
#define PERF_MEM_LVL_MISS 0x04 /* miss level */
|
||||
#define PERF_MEM_LVL_L1 0x08 /* L1 */
|
||||
#define PERF_MEM_LVL_LFB 0x10 /* Line Fill Buffer */
|
||||
#define PERF_MEM_LVL_L2 0x20 /* L2 hit */
|
||||
#define PERF_MEM_LVL_L3 0x40 /* L3 hit */
|
||||
#define PERF_MEM_LVL_LOC_RAM 0x80 /* Local DRAM */
|
||||
#define PERF_MEM_LVL_REM_RAM1 0x100 /* Remote DRAM (1 hop) */
|
||||
#define PERF_MEM_LVL_REM_RAM2 0x200 /* Remote DRAM (2 hops) */
|
||||
#define PERF_MEM_LVL_REM_CCE1 0x400 /* Remote Cache (1 hop) */
|
||||
#define PERF_MEM_LVL_REM_CCE2 0x800 /* Remote Cache (2 hops) */
|
||||
#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */
|
||||
#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */
|
||||
#define PERF_MEM_LVL_SHIFT 5
|
||||
|
||||
/* snoop mode */
|
||||
#define PERF_MEM_SNOOP_NA 0x01 /* not available */
|
||||
#define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */
|
||||
#define PERF_MEM_SNOOP_HIT 0x04 /* snoop hit */
|
||||
#define PERF_MEM_SNOOP_MISS 0x08 /* snoop miss */
|
||||
#define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */
|
||||
#define PERF_MEM_SNOOP_SHIFT 19
|
||||
|
||||
/* locked instruction */
|
||||
#define PERF_MEM_LOCK_NA 0x01 /* not available */
|
||||
#define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */
|
||||
#define PERF_MEM_LOCK_SHIFT 24
|
||||
|
||||
/* TLB access */
|
||||
#define PERF_MEM_TLB_NA 0x01 /* not available */
|
||||
#define PERF_MEM_TLB_HIT 0x02 /* hit level */
|
||||
#define PERF_MEM_TLB_MISS 0x04 /* miss level */
|
||||
#define PERF_MEM_TLB_L1 0x08 /* L1 */
|
||||
#define PERF_MEM_TLB_L2 0x10 /* L2 */
|
||||
#define PERF_MEM_TLB_WK 0x20 /* Hardware Walker*/
|
||||
#define PERF_MEM_TLB_OS 0x40 /* OS fault handler */
|
||||
#define PERF_MEM_TLB_SHIFT 26
|
||||
|
||||
#define PERF_MEM_S(a, s) \
|
||||
(((u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
|
||||
|
||||
#endif /* _UAPI_LINUX_PERF_EVENT_H */
|
||||
|
@ -982,6 +982,9 @@ static void perf_event__header_size(struct perf_event *event)
|
||||
if (sample_type & PERF_SAMPLE_READ)
|
||||
size += event->read_size;
|
||||
|
||||
if (sample_type & PERF_SAMPLE_DATA_SRC)
|
||||
size += sizeof(data->data_src.val);
|
||||
|
||||
event->header_size = size;
|
||||
}
|
||||
|
||||
@ -4199,6 +4202,9 @@ void perf_output_sample(struct perf_output_handle *handle,
|
||||
|
||||
if (sample_type & PERF_SAMPLE_WEIGHT)
|
||||
perf_output_put(handle, data->weight);
|
||||
|
||||
if (sample_type & PERF_SAMPLE_DATA_SRC)
|
||||
perf_output_put(handle, data->data_src.val);
|
||||
}
|
||||
|
||||
void perf_prepare_sample(struct perf_event_header *header,
|
||||
|
Loading…
Reference in New Issue
Block a user