mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-27 10:25:05 +07:00
a35d16905e
Recently a discussion about stability and performance of a system involving a high rate of kfree_rcu() calls surfaced on the list [1] which led to another discussion how to prepare for this situation. This patch adds basic batching support for kfree_rcu(). It is "basic" because we do none of the slab management, dynamic allocation, code moving or any of the other things, some of which previous attempts did [2]. These fancier improvements can be follow-up patches and there are different ideas being discussed in those regards. This is an effort to start simple, and build up from there. In the future, an extension to use kfree_bulk and possibly per-slab batching could be done to further improve performance due to cache-locality and slab-specific bulk free optimizations. By using an array of pointers, the worker thread processing the work would need to read lesser data since it does not need to deal with large rcu_head(s) any longer. Torture tests follow in the next patch and show improvements of around 5x reduction in number of grace periods on a 16 CPU system. More details and test data are in that patch. There is an implication with rcu_barrier() with this patch. Since the kfree_rcu() calls can be batched, and may not be handed yet to the RCU machinery in fact, the monitor may not have even run yet to do the queue_rcu_work(), there seems no easy way of implementing rcu_barrier() to wait for those kfree_rcu()s that are already made. So this means a kfree_rcu() followed by an rcu_barrier() does not imply that memory will be freed once rcu_barrier() returns. Another implication is higher active memory usage (although not run-away..) until the kfree_rcu() flooding ends, in comparison to without batching. More details about this are in the second patch which adds an rcuperf test. Finally, in the near future we will get rid of kfree_rcu() special casing within RCU such as in rcu_do_batch and switch everything to just batching. Currently we don't do that since timer subsystem is not yet up and we cannot schedule the kfree_rcu() monitor as the timer subsystem's lock are not initialized. That would also mean getting rid of kfree_call_rcu_nobatch() entirely. [1] http://lore.kernel.org/lkml/20190723035725-mutt-send-email-mst@kernel.org [2] https://lkml.org/lkml/2017/12/19/824 Cc: kernel-team@android.com Cc: kernel-team@lge.com Co-developed-by: Byungchul Park <byungchul.park@lge.com> Signed-off-by: Byungchul Park <byungchul.park@lge.com> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org> [ paulmck: Applied 0day and Paul Walmsley feedback on ->monitor_todo. ] [ paulmck: Make it work during early boot. ] [ paulmck: Add a crude early boot self-test. ] [ paulmck: Style adjustments and experimental docbook structure header. ] Link: https://lore.kernel.org/lkml/alpine.DEB.2.21.9999.1908161931110.32497@viisi.sifive.com/T/#me9956f66cb611b95d26ae92700e1d901f46e8c59 Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
107 lines
2.8 KiB
C
107 lines
2.8 KiB
C
/* SPDX-License-Identifier: GPL-2.0+ */
|
|
/*
|
|
* Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition.
|
|
*
|
|
* Copyright IBM Corporation, 2008
|
|
*
|
|
* Author: Paul E. McKenney <paulmck@linux.ibm.com>
|
|
*
|
|
* For detailed explanation of Read-Copy Update mechanism see -
|
|
* Documentation/RCU
|
|
*/
|
|
#ifndef __LINUX_TINY_H
|
|
#define __LINUX_TINY_H
|
|
|
|
#include <asm/param.h> /* for HZ */
|
|
|
|
/* Never flag non-existent other CPUs! */
|
|
static inline bool rcu_eqs_special_set(int cpu) { return false; }
|
|
|
|
static inline unsigned long get_state_synchronize_rcu(void)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline void cond_synchronize_rcu(unsigned long oldstate)
|
|
{
|
|
might_sleep();
|
|
}
|
|
|
|
extern void rcu_barrier(void);
|
|
|
|
static inline void synchronize_rcu_expedited(void)
|
|
{
|
|
synchronize_rcu();
|
|
}
|
|
|
|
static inline void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
|
|
{
|
|
call_rcu(head, func);
|
|
}
|
|
|
|
static inline void kfree_call_rcu_nobatch(struct rcu_head *head, rcu_callback_t func)
|
|
{
|
|
call_rcu(head, func);
|
|
}
|
|
|
|
void rcu_qs(void);
|
|
|
|
static inline void rcu_softirq_qs(void)
|
|
{
|
|
rcu_qs();
|
|
}
|
|
|
|
#define rcu_note_context_switch(preempt) \
|
|
do { \
|
|
rcu_qs(); \
|
|
rcu_tasks_qs(current); \
|
|
} while (0)
|
|
|
|
static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt)
|
|
{
|
|
*nextevt = KTIME_MAX;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Take advantage of the fact that there is only one CPU, which
|
|
* allows us to ignore virtualization-based context switches.
|
|
*/
|
|
static inline void rcu_virt_note_context_switch(int cpu) { }
|
|
static inline void rcu_cpu_stall_reset(void) { }
|
|
static inline int rcu_jiffies_till_stall_check(void) { return 21 * HZ; }
|
|
static inline void rcu_idle_enter(void) { }
|
|
static inline void rcu_idle_exit(void) { }
|
|
static inline void rcu_irq_enter(void) { }
|
|
static inline void rcu_irq_exit_irqson(void) { }
|
|
static inline void rcu_irq_enter_irqson(void) { }
|
|
static inline void rcu_irq_exit(void) { }
|
|
static inline void exit_rcu(void) { }
|
|
static inline bool rcu_preempt_need_deferred_qs(struct task_struct *t)
|
|
{
|
|
return false;
|
|
}
|
|
static inline void rcu_preempt_deferred_qs(struct task_struct *t) { }
|
|
#ifdef CONFIG_SRCU
|
|
void rcu_scheduler_starting(void);
|
|
#else /* #ifndef CONFIG_SRCU */
|
|
static inline void rcu_scheduler_starting(void) { }
|
|
#endif /* #else #ifndef CONFIG_SRCU */
|
|
static inline void rcu_end_inkernel_boot(void) { }
|
|
static inline bool rcu_is_watching(void) { return true; }
|
|
static inline void rcu_momentary_dyntick_idle(void) { }
|
|
static inline void kfree_rcu_scheduler_running(void) { }
|
|
|
|
/* Avoid RCU read-side critical sections leaking across. */
|
|
static inline void rcu_all_qs(void) { barrier(); }
|
|
|
|
/* RCUtree hotplug events */
|
|
#define rcutree_prepare_cpu NULL
|
|
#define rcutree_online_cpu NULL
|
|
#define rcutree_offline_cpu NULL
|
|
#define rcutree_dead_cpu NULL
|
|
#define rcutree_dying_cpu NULL
|
|
static inline void rcu_cpu_starting(unsigned int cpu) { }
|
|
|
|
#endif /* __LINUX_RCUTINY_H */
|