mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-04-20 11:07:48 +07:00
perf: Fix LLC-* events on Intel Nehalem/Westmere
On Intel Nehalem and Westmere CPUs the generic perf LLC-* events count the L2 caches, not the real L3 LLC - this was inconsistent with behavior on other CPUs. Fixing this requires the use of the special OFFCORE_RESPONSE events which need a separate mask register. This has been implemented by the previous patch, now use this infrastructure to set correct events for the LLC-* on Nehalem and Westmere. Signed-off-by: Andi Kleen <ak@linux.intel.com> Signed-off-by: Lin Ming <ming.m.lin@intel.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <1299119690-13991-3-git-send-email-ming.m.lin@intel.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
parent
a7e3ed1e47
commit
e994d7d23a
@ -310,6 +310,10 @@ static u64 __read_mostly hw_cache_event_ids
|
|||||||
[PERF_COUNT_HW_CACHE_MAX]
|
[PERF_COUNT_HW_CACHE_MAX]
|
||||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||||
[PERF_COUNT_HW_CACHE_RESULT_MAX];
|
[PERF_COUNT_HW_CACHE_RESULT_MAX];
|
||||||
|
static u64 __read_mostly hw_cache_extra_regs
|
||||||
|
[PERF_COUNT_HW_CACHE_MAX]
|
||||||
|
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||||
|
[PERF_COUNT_HW_CACHE_RESULT_MAX];
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Propagate event elapsed time into the generic event.
|
* Propagate event elapsed time into the generic event.
|
||||||
@ -524,8 +528,9 @@ static inline int x86_pmu_initialized(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static inline int
|
static inline int
|
||||||
set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
|
set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
|
||||||
{
|
{
|
||||||
|
struct perf_event_attr *attr = &event->attr;
|
||||||
unsigned int cache_type, cache_op, cache_result;
|
unsigned int cache_type, cache_op, cache_result;
|
||||||
u64 config, val;
|
u64 config, val;
|
||||||
|
|
||||||
@ -552,8 +557,8 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
hwc->config |= val;
|
hwc->config |= val;
|
||||||
|
attr->config1 = hw_cache_extra_regs[cache_type][cache_op][cache_result];
|
||||||
return 0;
|
return x86_pmu_extra_regs(val, event);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int x86_setup_perfctr(struct perf_event *event)
|
static int x86_setup_perfctr(struct perf_event *event)
|
||||||
@ -578,10 +583,10 @@ static int x86_setup_perfctr(struct perf_event *event)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (attr->type == PERF_TYPE_RAW)
|
if (attr->type == PERF_TYPE_RAW)
|
||||||
return 0;
|
return x86_pmu_extra_regs(event->attr.config, event);
|
||||||
|
|
||||||
if (attr->type == PERF_TYPE_HW_CACHE)
|
if (attr->type == PERF_TYPE_HW_CACHE)
|
||||||
return set_ext_hw_attr(hwc, attr);
|
return set_ext_hw_attr(hwc, event);
|
||||||
|
|
||||||
if (attr->config >= x86_pmu.max_events)
|
if (attr->config >= x86_pmu.max_events)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
@ -285,16 +285,26 @@ static __initconst const u64 westmere_hw_cache_event_ids
|
|||||||
},
|
},
|
||||||
[ C(LL ) ] = {
|
[ C(LL ) ] = {
|
||||||
[ C(OP_READ) ] = {
|
[ C(OP_READ) ] = {
|
||||||
[ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
|
/* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */
|
||||||
[ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
|
[ C(RESULT_ACCESS) ] = 0x01b7,
|
||||||
|
/* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */
|
||||||
|
[ C(RESULT_MISS) ] = 0x01bb,
|
||||||
},
|
},
|
||||||
|
/*
|
||||||
|
* Use RFO, not WRITEBACK, because a write miss would typically occur
|
||||||
|
* on RFO.
|
||||||
|
*/
|
||||||
[ C(OP_WRITE) ] = {
|
[ C(OP_WRITE) ] = {
|
||||||
[ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
|
/* OFFCORE_RESPONSE_1.ANY_RFO.LOCAL_CACHE */
|
||||||
[ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
|
[ C(RESULT_ACCESS) ] = 0x01bb,
|
||||||
|
/* OFFCORE_RESPONSE_0.ANY_RFO.ANY_LLC_MISS */
|
||||||
|
[ C(RESULT_MISS) ] = 0x01b7,
|
||||||
},
|
},
|
||||||
[ C(OP_PREFETCH) ] = {
|
[ C(OP_PREFETCH) ] = {
|
||||||
[ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
|
/* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */
|
||||||
[ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
|
[ C(RESULT_ACCESS) ] = 0x01b7,
|
||||||
|
/* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */
|
||||||
|
[ C(RESULT_MISS) ] = 0x01bb,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
[ C(DTLB) ] = {
|
[ C(DTLB) ] = {
|
||||||
@ -341,6 +351,39 @@ static __initconst const u64 westmere_hw_cache_event_ids
|
|||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* OFFCORE_RESPONSE MSR bits (subset), See IA32 SDM Vol 3 30.6.1.3
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define DMND_DATA_RD (1 << 0)
|
||||||
|
#define DMND_RFO (1 << 1)
|
||||||
|
#define DMND_WB (1 << 3)
|
||||||
|
#define PF_DATA_RD (1 << 4)
|
||||||
|
#define PF_DATA_RFO (1 << 5)
|
||||||
|
#define RESP_UNCORE_HIT (1 << 8)
|
||||||
|
#define RESP_MISS (0xf600) /* non uncore hit */
|
||||||
|
|
||||||
|
static __initconst const u64 nehalem_hw_cache_extra_regs
|
||||||
|
[PERF_COUNT_HW_CACHE_MAX]
|
||||||
|
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||||
|
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
|
||||||
|
{
|
||||||
|
[ C(LL ) ] = {
|
||||||
|
[ C(OP_READ) ] = {
|
||||||
|
[ C(RESULT_ACCESS) ] = DMND_DATA_RD|RESP_UNCORE_HIT,
|
||||||
|
[ C(RESULT_MISS) ] = DMND_DATA_RD|RESP_MISS,
|
||||||
|
},
|
||||||
|
[ C(OP_WRITE) ] = {
|
||||||
|
[ C(RESULT_ACCESS) ] = DMND_RFO|DMND_WB|RESP_UNCORE_HIT,
|
||||||
|
[ C(RESULT_MISS) ] = DMND_RFO|DMND_WB|RESP_MISS,
|
||||||
|
},
|
||||||
|
[ C(OP_PREFETCH) ] = {
|
||||||
|
[ C(RESULT_ACCESS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_UNCORE_HIT,
|
||||||
|
[ C(RESULT_MISS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_MISS,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
static __initconst const u64 nehalem_hw_cache_event_ids
|
static __initconst const u64 nehalem_hw_cache_event_ids
|
||||||
[PERF_COUNT_HW_CACHE_MAX]
|
[PERF_COUNT_HW_CACHE_MAX]
|
||||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||||
@ -376,16 +419,26 @@ static __initconst const u64 nehalem_hw_cache_event_ids
|
|||||||
},
|
},
|
||||||
[ C(LL ) ] = {
|
[ C(LL ) ] = {
|
||||||
[ C(OP_READ) ] = {
|
[ C(OP_READ) ] = {
|
||||||
[ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
|
/* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
|
||||||
[ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
|
[ C(RESULT_ACCESS) ] = 0x01b7,
|
||||||
|
/* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
|
||||||
|
[ C(RESULT_MISS) ] = 0x01b7,
|
||||||
},
|
},
|
||||||
|
/*
|
||||||
|
* Use RFO, not WRITEBACK, because a write miss would typically occur
|
||||||
|
* on RFO.
|
||||||
|
*/
|
||||||
[ C(OP_WRITE) ] = {
|
[ C(OP_WRITE) ] = {
|
||||||
[ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
|
/* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
|
||||||
[ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
|
[ C(RESULT_ACCESS) ] = 0x01b7,
|
||||||
|
/* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
|
||||||
|
[ C(RESULT_MISS) ] = 0x01b7,
|
||||||
},
|
},
|
||||||
[ C(OP_PREFETCH) ] = {
|
[ C(OP_PREFETCH) ] = {
|
||||||
[ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
|
/* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
|
||||||
[ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
|
[ C(RESULT_ACCESS) ] = 0x01b7,
|
||||||
|
/* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
|
||||||
|
[ C(RESULT_MISS) ] = 0x01b7,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
[ C(DTLB) ] = {
|
[ C(DTLB) ] = {
|
||||||
@ -1340,6 +1393,8 @@ static __init int intel_pmu_init(void)
|
|||||||
case 46: /* 45 nm nehalem-ex, "Beckton" */
|
case 46: /* 45 nm nehalem-ex, "Beckton" */
|
||||||
memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
|
memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
|
||||||
sizeof(hw_cache_event_ids));
|
sizeof(hw_cache_event_ids));
|
||||||
|
memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
|
||||||
|
sizeof(hw_cache_extra_regs));
|
||||||
|
|
||||||
intel_pmu_lbr_init_nhm();
|
intel_pmu_lbr_init_nhm();
|
||||||
|
|
||||||
@ -1366,6 +1421,8 @@ static __init int intel_pmu_init(void)
|
|||||||
case 44: /* 32 nm nehalem, "Gulftown" */
|
case 44: /* 32 nm nehalem, "Gulftown" */
|
||||||
memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
|
memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
|
||||||
sizeof(hw_cache_event_ids));
|
sizeof(hw_cache_event_ids));
|
||||||
|
memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
|
||||||
|
sizeof(hw_cache_extra_regs));
|
||||||
|
|
||||||
intel_pmu_lbr_init_nhm();
|
intel_pmu_lbr_init_nhm();
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user