mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-15 19:46:56 +07:00
148519120c
Revert commit69a37bea
(cpuidle: Quickly notice prediction failure for repeat mode), because it has been identified as the source of a significant performance regression in v3.8 and later as explained by Jeremy Eder: We believe we've identified a particular commit to the cpuidle code that seems to be impacting performance of variety of workloads. The simplest way to reproduce is using netperf TCP_RR test, so we're using that, on a pair of Sandy Bridge based servers. We also have data from a large database setup where performance is also measurably/positively impacted, though that test data isn't easily share-able. Included below are test results from 3 test kernels: kernel reverts ----------------------------------------------------------- 1) vanilla upstream (no reverts) 2) perfteam2 revertse11538d1f0
3) test reverts69a37beabf
e11538d1f0
In summary, netperf TCP_RR numbers improve by approximately 4% after reverting69a37beabf
. When69a37beabf
is included, C0 residency never seems to get above 40%. Taking that patch out gets C0 near 100% quite often, and performance increases. The below data are histograms representing the %c0 residency @ 1-second sample rates (using turbostat), while under netperf test. - If you look at the first 4 histograms, you can see %c0 residency almost entirely in the 30,40% bin. - The last pair, which reverts69a37beabf
, shows %c0 in the 80,90,100% bins. Below each kernel name are netperf TCP_RR trans/s numbers for the particular kernel that can be disclosed publicly, comparing the 3 test kernels. We ran a 4th test with the vanilla kernel where we've also set /dev/cpu_dma_latency=0 to show overall impact boosting single-threaded TCP_RR performance over 11% above baseline. 3.10-rc2 vanilla RX + c0 lock (/dev/cpu_dma_latency=0): TCP_RR trans/s 54323.78 ----------------------------------------------------------- 3.10-rc2 vanilla RX (no reverts) TCP_RR trans/s 48192.47 Receiver %c0 0.0000 - 10.0000 [ 1]: * 10.0000 - 20.0000 [ 0]: 20.0000 - 30.0000 [ 0]: 30.0000 - 40.0000 [ 59]: *********************************************************** 40.0000 - 50.0000 [ 1]: * 50.0000 - 60.0000 [ 0]: 60.0000 - 70.0000 [ 0]: 70.0000 - 80.0000 [ 0]: 80.0000 - 90.0000 [ 0]: 90.0000 - 100.0000 [ 0]: Sender %c0 0.0000 - 10.0000 [ 1]: * 10.0000 - 20.0000 [ 0]: 20.0000 - 30.0000 [ 0]: 30.0000 - 40.0000 [ 11]: *********** 40.0000 - 50.0000 [ 49]: ************************************************* 50.0000 - 60.0000 [ 0]: 60.0000 - 70.0000 [ 0]: 70.0000 - 80.0000 [ 0]: 80.0000 - 90.0000 [ 0]: 90.0000 - 100.0000 [ 0]: ----------------------------------------------------------- 3.10-rc2 perfteam2 RX (reverts commite11538d1f0
) TCP_RR trans/s 49698.69 Receiver %c0 0.0000 - 10.0000 [ 1]: * 10.0000 - 20.0000 [ 1]: * 20.0000 - 30.0000 [ 0]: 30.0000 - 40.0000 [ 59]: *********************************************************** 40.0000 - 50.0000 [ 0]: 50.0000 - 60.0000 [ 0]: 60.0000 - 70.0000 [ 0]: 70.0000 - 80.0000 [ 0]: 80.0000 - 90.0000 [ 0]: 90.0000 - 100.0000 [ 0]: Sender %c0 0.0000 - 10.0000 [ 1]: * 10.0000 - 20.0000 [ 0]: 20.0000 - 30.0000 [ 0]: 30.0000 - 40.0000 [ 2]: ** 40.0000 - 50.0000 [ 58]: ********************************************************** 50.0000 - 60.0000 [ 0]: 60.0000 - 70.0000 [ 0]: 70.0000 - 80.0000 [ 0]: 80.0000 - 90.0000 [ 0]: 90.0000 - 100.0000 [ 0]: ----------------------------------------------------------- 3.10-rc2 test RX (reverts69a37beabf
ande11538d1f0
) TCP_RR trans/s 47766.95 Receiver %c0 0.0000 - 10.0000 [ 1]: * 10.0000 - 20.0000 [ 1]: * 20.0000 - 30.0000 [ 0]: 30.0000 - 40.0000 [ 27]: *************************** 40.0000 - 50.0000 [ 2]: ** 50.0000 - 60.0000 [ 0]: 60.0000 - 70.0000 [ 2]: ** 70.0000 - 80.0000 [ 0]: 80.0000 - 90.0000 [ 0]: 90.0000 - 100.0000 [ 28]: **************************** Sender: 0.0000 - 10.0000 [ 1]: * 10.0000 - 20.0000 [ 0]: 20.0000 - 30.0000 [ 0]: 30.0000 - 40.0000 [ 11]: *********** 40.0000 - 50.0000 [ 0]: 50.0000 - 60.0000 [ 1]: * 60.0000 - 70.0000 [ 0]: 70.0000 - 80.0000 [ 3]: *** 80.0000 - 90.0000 [ 7]: ******* 90.0000 - 100.0000 [ 38]: ************************************** These results demonstrate gaining back the tendency of the CPU to stay in more responsive, performant C-states (and thus yield measurably better performance), by reverting commit69a37beabf
. Requested-by: Jeremy Eder <jeder@redhat.com> Tested-by: Len Brown <len.brown@intel.com> Cc: 3.8+ <stable@vger.kernel.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
178 lines
5.5 KiB
C
178 lines
5.5 KiB
C
/* linux/include/linux/tick.h
|
|
*
|
|
* This file contains the structure definitions for tick related functions
|
|
*
|
|
*/
|
|
#ifndef _LINUX_TICK_H
|
|
#define _LINUX_TICK_H
|
|
|
|
#include <linux/clockchips.h>
|
|
#include <linux/irqflags.h>
|
|
#include <linux/percpu.h>
|
|
#include <linux/hrtimer.h>
|
|
|
|
#ifdef CONFIG_GENERIC_CLOCKEVENTS
|
|
|
|
enum tick_device_mode {
|
|
TICKDEV_MODE_PERIODIC,
|
|
TICKDEV_MODE_ONESHOT,
|
|
};
|
|
|
|
struct tick_device {
|
|
struct clock_event_device *evtdev;
|
|
enum tick_device_mode mode;
|
|
};
|
|
|
|
enum tick_nohz_mode {
|
|
NOHZ_MODE_INACTIVE,
|
|
NOHZ_MODE_LOWRES,
|
|
NOHZ_MODE_HIGHRES,
|
|
};
|
|
|
|
/**
|
|
* struct tick_sched - sched tick emulation and no idle tick control/stats
|
|
* @sched_timer: hrtimer to schedule the periodic tick in high
|
|
* resolution mode
|
|
* @last_tick: Store the last tick expiry time when the tick
|
|
* timer is modified for nohz sleeps. This is necessary
|
|
* to resume the tick timer operation in the timeline
|
|
* when the CPU returns from nohz sleep.
|
|
* @tick_stopped: Indicator that the idle tick has been stopped
|
|
* @idle_jiffies: jiffies at the entry to idle for idle time accounting
|
|
* @idle_calls: Total number of idle calls
|
|
* @idle_sleeps: Number of idle calls, where the sched tick was stopped
|
|
* @idle_entrytime: Time when the idle call was entered
|
|
* @idle_waketime: Time when the idle was interrupted
|
|
* @idle_exittime: Time when the idle state was left
|
|
* @idle_sleeptime: Sum of the time slept in idle with sched tick stopped
|
|
* @iowait_sleeptime: Sum of the time slept in idle with sched tick stopped, with IO outstanding
|
|
* @sleep_length: Duration of the current idle sleep
|
|
* @do_timer_lst: CPU was the last one doing do_timer before going idle
|
|
*/
|
|
struct tick_sched {
|
|
struct hrtimer sched_timer;
|
|
unsigned long check_clocks;
|
|
enum tick_nohz_mode nohz_mode;
|
|
ktime_t last_tick;
|
|
int inidle;
|
|
int tick_stopped;
|
|
unsigned long idle_jiffies;
|
|
unsigned long idle_calls;
|
|
unsigned long idle_sleeps;
|
|
int idle_active;
|
|
ktime_t idle_entrytime;
|
|
ktime_t idle_waketime;
|
|
ktime_t idle_exittime;
|
|
ktime_t idle_sleeptime;
|
|
ktime_t iowait_sleeptime;
|
|
ktime_t sleep_length;
|
|
unsigned long last_jiffies;
|
|
unsigned long next_jiffies;
|
|
ktime_t idle_expires;
|
|
int do_timer_last;
|
|
};
|
|
|
|
extern void __init tick_init(void);
|
|
extern int tick_is_oneshot_available(void);
|
|
extern struct tick_device *tick_get_device(int cpu);
|
|
|
|
# ifdef CONFIG_HIGH_RES_TIMERS
|
|
extern int tick_init_highres(void);
|
|
extern int tick_program_event(ktime_t expires, int force);
|
|
extern void tick_setup_sched_timer(void);
|
|
# endif
|
|
|
|
# if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
|
|
extern void tick_cancel_sched_timer(int cpu);
|
|
# else
|
|
static inline void tick_cancel_sched_timer(int cpu) { }
|
|
# endif
|
|
|
|
# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
|
|
extern struct tick_device *tick_get_broadcast_device(void);
|
|
extern struct cpumask *tick_get_broadcast_mask(void);
|
|
|
|
# ifdef CONFIG_TICK_ONESHOT
|
|
extern struct cpumask *tick_get_broadcast_oneshot_mask(void);
|
|
# endif
|
|
|
|
# endif /* BROADCAST */
|
|
|
|
# ifdef CONFIG_TICK_ONESHOT
|
|
extern void tick_clock_notify(void);
|
|
extern int tick_check_oneshot_change(int allow_nohz);
|
|
extern struct tick_sched *tick_get_tick_sched(int cpu);
|
|
extern void tick_check_idle(int cpu);
|
|
extern int tick_oneshot_mode_active(void);
|
|
# ifndef arch_needs_cpu
|
|
# define arch_needs_cpu(cpu) (0)
|
|
# endif
|
|
# else
|
|
static inline void tick_clock_notify(void) { }
|
|
static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
|
|
static inline void tick_check_idle(int cpu) { }
|
|
static inline int tick_oneshot_mode_active(void) { return 0; }
|
|
# endif
|
|
|
|
#else /* CONFIG_GENERIC_CLOCKEVENTS */
|
|
static inline void tick_init(void) { }
|
|
static inline void tick_cancel_sched_timer(int cpu) { }
|
|
static inline void tick_clock_notify(void) { }
|
|
static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
|
|
static inline void tick_check_idle(int cpu) { }
|
|
static inline int tick_oneshot_mode_active(void) { return 0; }
|
|
#endif /* !CONFIG_GENERIC_CLOCKEVENTS */
|
|
|
|
# ifdef CONFIG_NO_HZ_COMMON
|
|
DECLARE_PER_CPU(struct tick_sched, tick_cpu_sched);
|
|
|
|
static inline int tick_nohz_tick_stopped(void)
|
|
{
|
|
return __this_cpu_read(tick_cpu_sched.tick_stopped);
|
|
}
|
|
|
|
extern void tick_nohz_idle_enter(void);
|
|
extern void tick_nohz_idle_exit(void);
|
|
extern void tick_nohz_irq_exit(void);
|
|
extern ktime_t tick_nohz_get_sleep_length(void);
|
|
extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
|
|
extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
|
|
|
|
# else /* !CONFIG_NO_HZ_COMMON */
|
|
static inline int tick_nohz_tick_stopped(void)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline void tick_nohz_idle_enter(void) { }
|
|
static inline void tick_nohz_idle_exit(void) { }
|
|
|
|
static inline ktime_t tick_nohz_get_sleep_length(void)
|
|
{
|
|
ktime_t len = { .tv64 = NSEC_PER_SEC/HZ };
|
|
|
|
return len;
|
|
}
|
|
static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; }
|
|
static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }
|
|
# endif /* !CONFIG_NO_HZ_COMMON */
|
|
|
|
#ifdef CONFIG_NO_HZ_FULL
|
|
extern void tick_nohz_init(void);
|
|
extern int tick_nohz_full_cpu(int cpu);
|
|
extern void tick_nohz_full_check(void);
|
|
extern void tick_nohz_full_kick(void);
|
|
extern void tick_nohz_full_kick_all(void);
|
|
extern void tick_nohz_task_switch(struct task_struct *tsk);
|
|
#else
|
|
static inline void tick_nohz_init(void) { }
|
|
static inline int tick_nohz_full_cpu(int cpu) { return 0; }
|
|
static inline void tick_nohz_full_check(void) { }
|
|
static inline void tick_nohz_full_kick(void) { }
|
|
static inline void tick_nohz_full_kick_all(void) { }
|
|
static inline void tick_nohz_task_switch(struct task_struct *tsk) { }
|
|
#endif
|
|
|
|
|
|
#endif
|