mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-16 10:06:42 +07:00
69842cba9a
Utilization clamping allows to clamp the CPU's utilization within a [util_min, util_max] range, depending on the set of RUNNABLE tasks on that CPU. Each task references two "clamp buckets" defining its minimum and maximum (util_{min,max}) utilization "clamp values". A CPU's clamp bucket is active if there is at least one RUNNABLE tasks enqueued on that CPU and refcounting that bucket. When a task is {en,de}queued {on,from} a rq, the set of active clamp buckets on that CPU can change. If the set of active clamp buckets changes for a CPU a new "aggregated" clamp value is computed for that CPU. This is because each clamp bucket enforces a different utilization clamp value. Clamp values are always MAX aggregated for both util_min and util_max. This ensures that no task can affect the performance of other co-scheduled tasks which are more boosted (i.e. with higher util_min clamp) or less capped (i.e. with higher util_max clamp). A task has: task_struct::uclamp[clamp_id]::bucket_id to track the "bucket index" of the CPU's clamp bucket it refcounts while enqueued, for each clamp index (clamp_id). A runqueue has: rq::uclamp[clamp_id]::bucket[bucket_id].tasks to track how many RUNNABLE tasks on that CPU refcount each clamp bucket (bucket_id) of a clamp index (clamp_id). It also has a: rq::uclamp[clamp_id]::bucket[bucket_id].value to track the clamp value of each clamp bucket (bucket_id) of a clamp index (clamp_id). The rq::uclamp::bucket[clamp_id][] array is scanned every time it's needed to find a new MAX aggregated clamp value for a clamp_id. This operation is required only when it's dequeued the last task of a clamp bucket tracking the current MAX aggregated clamp value. In this case, the CPU is either entering IDLE or going to schedule a less boosted or more clamped task. The expected number of different clamp values configured at build time is small enough to fit the full unordered array into a single cache line, for configurations of up to 7 buckets. Add to struct rq the basic data structures required to refcount the number of RUNNABLE tasks for each clamp bucket. Add also the max aggregation required to update the rq's clamp value at each enqueue/dequeue event. Use a simple linear mapping of clamp values into clamp buckets. Pre-compute and cache bucket_id to avoid integer divisions at enqueue/dequeue time. Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Alessio Balsini <balsini@android.com> Cc: Dietmar Eggemann <dietmar.eggemann@arm.com> Cc: Joel Fernandes <joelaf@google.com> Cc: Juri Lelli <juri.lelli@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Morten Rasmussen <morten.rasmussen@arm.com> Cc: Paul Turner <pjt@google.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Quentin Perret <quentin.perret@arm.com> Cc: Rafael J . Wysocki <rafael.j.wysocki@intel.com> Cc: Steve Muckle <smuckle@google.com> Cc: Suren Baghdasaryan <surenb@google.com> Cc: Tejun Heo <tj@kernel.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Todd Kjos <tkjos@google.com> Cc: Vincent Guittot <vincent.guittot@linaro.org> Cc: Viresh Kumar <viresh.kumar@linaro.org> Link: https://lkml.kernel.org/r/20190621084217.8167-2-patrick.bellasi@arm.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
258 lines
6.2 KiB
C
258 lines
6.2 KiB
C
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
/* Integer base 2 logarithm calculation
|
|
*
|
|
* Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
|
|
* Written by David Howells (dhowells@redhat.com)
|
|
*/
|
|
|
|
#ifndef _LINUX_LOG2_H
|
|
#define _LINUX_LOG2_H
|
|
|
|
#include <linux/types.h>
|
|
#include <linux/bitops.h>
|
|
|
|
/*
|
|
* non-constant log of base 2 calculators
|
|
* - the arch may override these in asm/bitops.h if they can be implemented
|
|
* more efficiently than using fls() and fls64()
|
|
* - the arch is not required to handle n==0 if implementing the fallback
|
|
*/
|
|
#ifndef CONFIG_ARCH_HAS_ILOG2_U32
|
|
static inline __attribute__((const))
|
|
int __ilog2_u32(u32 n)
|
|
{
|
|
return fls(n) - 1;
|
|
}
|
|
#endif
|
|
|
|
#ifndef CONFIG_ARCH_HAS_ILOG2_U64
|
|
static inline __attribute__((const))
|
|
int __ilog2_u64(u64 n)
|
|
{
|
|
return fls64(n) - 1;
|
|
}
|
|
#endif
|
|
|
|
/**
|
|
* is_power_of_2() - check if a value is a power of two
|
|
* @n: the value to check
|
|
*
|
|
* Determine whether some value is a power of two, where zero is
|
|
* *not* considered a power of two.
|
|
* Return: true if @n is a power of 2, otherwise false.
|
|
*/
|
|
static inline __attribute__((const))
|
|
bool is_power_of_2(unsigned long n)
|
|
{
|
|
return (n != 0 && ((n & (n - 1)) == 0));
|
|
}
|
|
|
|
/**
|
|
* __roundup_pow_of_two() - round up to nearest power of two
|
|
* @n: value to round up
|
|
*/
|
|
static inline __attribute__((const))
|
|
unsigned long __roundup_pow_of_two(unsigned long n)
|
|
{
|
|
return 1UL << fls_long(n - 1);
|
|
}
|
|
|
|
/**
|
|
* __rounddown_pow_of_two() - round down to nearest power of two
|
|
* @n: value to round down
|
|
*/
|
|
static inline __attribute__((const))
|
|
unsigned long __rounddown_pow_of_two(unsigned long n)
|
|
{
|
|
return 1UL << (fls_long(n) - 1);
|
|
}
|
|
|
|
/**
|
|
* const_ilog2 - log base 2 of 32-bit or a 64-bit constant unsigned value
|
|
* @n: parameter
|
|
*
|
|
* Use this where sparse expects a true constant expression, e.g. for array
|
|
* indices.
|
|
*/
|
|
#define const_ilog2(n) \
|
|
( \
|
|
__builtin_constant_p(n) ? ( \
|
|
(n) < 2 ? 0 : \
|
|
(n) & (1ULL << 63) ? 63 : \
|
|
(n) & (1ULL << 62) ? 62 : \
|
|
(n) & (1ULL << 61) ? 61 : \
|
|
(n) & (1ULL << 60) ? 60 : \
|
|
(n) & (1ULL << 59) ? 59 : \
|
|
(n) & (1ULL << 58) ? 58 : \
|
|
(n) & (1ULL << 57) ? 57 : \
|
|
(n) & (1ULL << 56) ? 56 : \
|
|
(n) & (1ULL << 55) ? 55 : \
|
|
(n) & (1ULL << 54) ? 54 : \
|
|
(n) & (1ULL << 53) ? 53 : \
|
|
(n) & (1ULL << 52) ? 52 : \
|
|
(n) & (1ULL << 51) ? 51 : \
|
|
(n) & (1ULL << 50) ? 50 : \
|
|
(n) & (1ULL << 49) ? 49 : \
|
|
(n) & (1ULL << 48) ? 48 : \
|
|
(n) & (1ULL << 47) ? 47 : \
|
|
(n) & (1ULL << 46) ? 46 : \
|
|
(n) & (1ULL << 45) ? 45 : \
|
|
(n) & (1ULL << 44) ? 44 : \
|
|
(n) & (1ULL << 43) ? 43 : \
|
|
(n) & (1ULL << 42) ? 42 : \
|
|
(n) & (1ULL << 41) ? 41 : \
|
|
(n) & (1ULL << 40) ? 40 : \
|
|
(n) & (1ULL << 39) ? 39 : \
|
|
(n) & (1ULL << 38) ? 38 : \
|
|
(n) & (1ULL << 37) ? 37 : \
|
|
(n) & (1ULL << 36) ? 36 : \
|
|
(n) & (1ULL << 35) ? 35 : \
|
|
(n) & (1ULL << 34) ? 34 : \
|
|
(n) & (1ULL << 33) ? 33 : \
|
|
(n) & (1ULL << 32) ? 32 : \
|
|
(n) & (1ULL << 31) ? 31 : \
|
|
(n) & (1ULL << 30) ? 30 : \
|
|
(n) & (1ULL << 29) ? 29 : \
|
|
(n) & (1ULL << 28) ? 28 : \
|
|
(n) & (1ULL << 27) ? 27 : \
|
|
(n) & (1ULL << 26) ? 26 : \
|
|
(n) & (1ULL << 25) ? 25 : \
|
|
(n) & (1ULL << 24) ? 24 : \
|
|
(n) & (1ULL << 23) ? 23 : \
|
|
(n) & (1ULL << 22) ? 22 : \
|
|
(n) & (1ULL << 21) ? 21 : \
|
|
(n) & (1ULL << 20) ? 20 : \
|
|
(n) & (1ULL << 19) ? 19 : \
|
|
(n) & (1ULL << 18) ? 18 : \
|
|
(n) & (1ULL << 17) ? 17 : \
|
|
(n) & (1ULL << 16) ? 16 : \
|
|
(n) & (1ULL << 15) ? 15 : \
|
|
(n) & (1ULL << 14) ? 14 : \
|
|
(n) & (1ULL << 13) ? 13 : \
|
|
(n) & (1ULL << 12) ? 12 : \
|
|
(n) & (1ULL << 11) ? 11 : \
|
|
(n) & (1ULL << 10) ? 10 : \
|
|
(n) & (1ULL << 9) ? 9 : \
|
|
(n) & (1ULL << 8) ? 8 : \
|
|
(n) & (1ULL << 7) ? 7 : \
|
|
(n) & (1ULL << 6) ? 6 : \
|
|
(n) & (1ULL << 5) ? 5 : \
|
|
(n) & (1ULL << 4) ? 4 : \
|
|
(n) & (1ULL << 3) ? 3 : \
|
|
(n) & (1ULL << 2) ? 2 : \
|
|
1) : \
|
|
-1)
|
|
|
|
/**
|
|
* ilog2 - log base 2 of 32-bit or a 64-bit unsigned value
|
|
* @n: parameter
|
|
*
|
|
* constant-capable log of base 2 calculation
|
|
* - this can be used to initialise global variables from constant data, hence
|
|
* the massive ternary operator construction
|
|
*
|
|
* selects the appropriately-sized optimised version depending on sizeof(n)
|
|
*/
|
|
#define ilog2(n) \
|
|
( \
|
|
__builtin_constant_p(n) ? \
|
|
const_ilog2(n) : \
|
|
(sizeof(n) <= 4) ? \
|
|
__ilog2_u32(n) : \
|
|
__ilog2_u64(n) \
|
|
)
|
|
|
|
/**
|
|
* roundup_pow_of_two - round the given value up to nearest power of two
|
|
* @n: parameter
|
|
*
|
|
* round the given value up to the nearest power of two
|
|
* - the result is undefined when n == 0
|
|
* - this can be used to initialise global variables from constant data
|
|
*/
|
|
#define roundup_pow_of_two(n) \
|
|
( \
|
|
__builtin_constant_p(n) ? ( \
|
|
(n == 1) ? 1 : \
|
|
(1UL << (ilog2((n) - 1) + 1)) \
|
|
) : \
|
|
__roundup_pow_of_two(n) \
|
|
)
|
|
|
|
/**
|
|
* rounddown_pow_of_two - round the given value down to nearest power of two
|
|
* @n: parameter
|
|
*
|
|
* round the given value down to the nearest power of two
|
|
* - the result is undefined when n == 0
|
|
* - this can be used to initialise global variables from constant data
|
|
*/
|
|
#define rounddown_pow_of_two(n) \
|
|
( \
|
|
__builtin_constant_p(n) ? ( \
|
|
(1UL << ilog2(n))) : \
|
|
__rounddown_pow_of_two(n) \
|
|
)
|
|
|
|
static inline __attribute_const__
|
|
int __order_base_2(unsigned long n)
|
|
{
|
|
return n > 1 ? ilog2(n - 1) + 1 : 0;
|
|
}
|
|
|
|
/**
|
|
* order_base_2 - calculate the (rounded up) base 2 order of the argument
|
|
* @n: parameter
|
|
*
|
|
* The first few values calculated by this routine:
|
|
* ob2(0) = 0
|
|
* ob2(1) = 0
|
|
* ob2(2) = 1
|
|
* ob2(3) = 2
|
|
* ob2(4) = 2
|
|
* ob2(5) = 3
|
|
* ... and so on.
|
|
*/
|
|
#define order_base_2(n) \
|
|
( \
|
|
__builtin_constant_p(n) ? ( \
|
|
((n) == 0 || (n) == 1) ? 0 : \
|
|
ilog2((n) - 1) + 1) : \
|
|
__order_base_2(n) \
|
|
)
|
|
|
|
static inline __attribute__((const))
|
|
int __bits_per(unsigned long n)
|
|
{
|
|
if (n < 2)
|
|
return 1;
|
|
if (is_power_of_2(n))
|
|
return order_base_2(n) + 1;
|
|
return order_base_2(n);
|
|
}
|
|
|
|
/**
|
|
* bits_per - calculate the number of bits required for the argument
|
|
* @n: parameter
|
|
*
|
|
* This is constant-capable and can be used for compile time
|
|
* initializations, e.g bitfields.
|
|
*
|
|
* The first few values calculated by this routine:
|
|
* bf(0) = 1
|
|
* bf(1) = 1
|
|
* bf(2) = 2
|
|
* bf(3) = 2
|
|
* bf(4) = 3
|
|
* ... and so on.
|
|
*/
|
|
#define bits_per(n) \
|
|
( \
|
|
__builtin_constant_p(n) ? ( \
|
|
((n) == 0 || (n) == 1) \
|
|
? 1 : ilog2(n) + 1 \
|
|
) : \
|
|
__bits_per(n) \
|
|
)
|
|
#endif /* _LINUX_LOG2_H */
|