mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-28 11:18:45 +07:00
204c881e96
The example contains two values for the capacity currently, 446 in text and 578 in code. The numbers are all correct but can confuse some of the readers. This patch tries to explain how the numbers are calculated to avoid same confusion going forward. Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org> Signed-off-by: Rob Herring <robh@kernel.org>
239 lines
6.5 KiB
Plaintext
239 lines
6.5 KiB
Plaintext
==========================================
|
|
ARM CPUs capacity bindings
|
|
==========================================
|
|
|
|
==========================================
|
|
1 - Introduction
|
|
==========================================
|
|
|
|
ARM systems may be configured to have cpus with different power/performance
|
|
characteristics within the same chip. In this case, additional information has
|
|
to be made available to the kernel for it to be aware of such differences and
|
|
take decisions accordingly.
|
|
|
|
==========================================
|
|
2 - CPU capacity definition
|
|
==========================================
|
|
|
|
CPU capacity is a number that provides the scheduler information about CPUs
|
|
heterogeneity. Such heterogeneity can come from micro-architectural differences
|
|
(e.g., ARM big.LITTLE systems) or maximum frequency at which CPUs can run
|
|
(e.g., SMP systems with multiple frequency domains). Heterogeneity in this
|
|
context is about differing performance characteristics; this binding tries to
|
|
capture a first-order approximation of the relative performance of CPUs.
|
|
|
|
CPU capacities are obtained by running a suitable benchmark. This binding makes
|
|
no guarantees on the validity or suitability of any particular benchmark, the
|
|
final capacity should, however, be:
|
|
|
|
* A "single-threaded" or CPU affine benchmark
|
|
* Divided by the running frequency of the CPU executing the benchmark
|
|
* Not subject to dynamic frequency scaling of the CPU
|
|
|
|
For the time being we however advise usage of the Dhrystone benchmark. What
|
|
above thus becomes:
|
|
|
|
CPU capacities are obtained by running the Dhrystone benchmark on each CPU at
|
|
max frequency (with caches enabled). The obtained DMIPS score is then divided
|
|
by the frequency (in MHz) at which the benchmark has been run, so that
|
|
DMIPS/MHz are obtained. Such values are then normalized w.r.t. the highest
|
|
score obtained in the system.
|
|
|
|
==========================================
|
|
3 - capacity-dmips-mhz
|
|
==========================================
|
|
|
|
capacity-dmips-mhz is an optional cpu node [1] property: u32 value
|
|
representing CPU capacity expressed in normalized DMIPS/MHz. At boot time, the
|
|
maximum frequency available to the cpu is then used to calculate the capacity
|
|
value internally used by the kernel.
|
|
|
|
capacity-dmips-mhz property is all-or-nothing: if it is specified for a cpu
|
|
node, it has to be specified for every other cpu nodes, or the system will
|
|
fall back to the default capacity value for every CPU. If cpufreq is not
|
|
available, final capacities are calculated by directly using capacity-dmips-
|
|
mhz values (normalized w.r.t. the highest value found while parsing the DT).
|
|
|
|
===========================================
|
|
4 - Examples
|
|
===========================================
|
|
|
|
Example 1 (ARM 64-bit, 6-cpu system, two clusters):
|
|
The capacities-dmips-mhz or DMIPS/MHz values (scaled to 1024)
|
|
are 1024 and 578 for cluster0 and cluster1. Further normalization
|
|
is done by the operating system based on cluster0@max-freq=1100 and
|
|
custer1@max-freq=850, final capacities are 1024 for cluster0 and
|
|
446 for cluster1 (576*850/1100).
|
|
|
|
cpus {
|
|
#address-cells = <2>;
|
|
#size-cells = <0>;
|
|
|
|
cpu-map {
|
|
cluster0 {
|
|
core0 {
|
|
cpu = <&A57_0>;
|
|
};
|
|
core1 {
|
|
cpu = <&A57_1>;
|
|
};
|
|
};
|
|
|
|
cluster1 {
|
|
core0 {
|
|
cpu = <&A53_0>;
|
|
};
|
|
core1 {
|
|
cpu = <&A53_1>;
|
|
};
|
|
core2 {
|
|
cpu = <&A53_2>;
|
|
};
|
|
core3 {
|
|
cpu = <&A53_3>;
|
|
};
|
|
};
|
|
};
|
|
|
|
idle-states {
|
|
entry-method = "psci";
|
|
|
|
CPU_SLEEP_0: cpu-sleep-0 {
|
|
compatible = "arm,idle-state";
|
|
arm,psci-suspend-param = <0x0010000>;
|
|
local-timer-stop;
|
|
entry-latency-us = <100>;
|
|
exit-latency-us = <250>;
|
|
min-residency-us = <150>;
|
|
};
|
|
|
|
CLUSTER_SLEEP_0: cluster-sleep-0 {
|
|
compatible = "arm,idle-state";
|
|
arm,psci-suspend-param = <0x1010000>;
|
|
local-timer-stop;
|
|
entry-latency-us = <800>;
|
|
exit-latency-us = <700>;
|
|
min-residency-us = <2500>;
|
|
};
|
|
};
|
|
|
|
A57_0: cpu@0 {
|
|
compatible = "arm,cortex-a57","arm,armv8";
|
|
reg = <0x0 0x0>;
|
|
device_type = "cpu";
|
|
enable-method = "psci";
|
|
next-level-cache = <&A57_L2>;
|
|
clocks = <&scpi_dvfs 0>;
|
|
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
|
|
capacity-dmips-mhz = <1024>;
|
|
};
|
|
|
|
A57_1: cpu@1 {
|
|
compatible = "arm,cortex-a57","arm,armv8";
|
|
reg = <0x0 0x1>;
|
|
device_type = "cpu";
|
|
enable-method = "psci";
|
|
next-level-cache = <&A57_L2>;
|
|
clocks = <&scpi_dvfs 0>;
|
|
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
|
|
capacity-dmips-mhz = <1024>;
|
|
};
|
|
|
|
A53_0: cpu@100 {
|
|
compatible = "arm,cortex-a53","arm,armv8";
|
|
reg = <0x0 0x100>;
|
|
device_type = "cpu";
|
|
enable-method = "psci";
|
|
next-level-cache = <&A53_L2>;
|
|
clocks = <&scpi_dvfs 1>;
|
|
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
|
|
capacity-dmips-mhz = <578>;
|
|
};
|
|
|
|
A53_1: cpu@101 {
|
|
compatible = "arm,cortex-a53","arm,armv8";
|
|
reg = <0x0 0x101>;
|
|
device_type = "cpu";
|
|
enable-method = "psci";
|
|
next-level-cache = <&A53_L2>;
|
|
clocks = <&scpi_dvfs 1>;
|
|
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
|
|
capacity-dmips-mhz = <578>;
|
|
};
|
|
|
|
A53_2: cpu@102 {
|
|
compatible = "arm,cortex-a53","arm,armv8";
|
|
reg = <0x0 0x102>;
|
|
device_type = "cpu";
|
|
enable-method = "psci";
|
|
next-level-cache = <&A53_L2>;
|
|
clocks = <&scpi_dvfs 1>;
|
|
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
|
|
capacity-dmips-mhz = <578>;
|
|
};
|
|
|
|
A53_3: cpu@103 {
|
|
compatible = "arm,cortex-a53","arm,armv8";
|
|
reg = <0x0 0x103>;
|
|
device_type = "cpu";
|
|
enable-method = "psci";
|
|
next-level-cache = <&A53_L2>;
|
|
clocks = <&scpi_dvfs 1>;
|
|
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
|
|
capacity-dmips-mhz = <578>;
|
|
};
|
|
|
|
A57_L2: l2-cache0 {
|
|
compatible = "cache";
|
|
};
|
|
|
|
A53_L2: l2-cache1 {
|
|
compatible = "cache";
|
|
};
|
|
};
|
|
|
|
Example 2 (ARM 32-bit, 4-cpu system, two clusters,
|
|
cpus 0,1@1GHz, cpus 2,3@500MHz):
|
|
capacities-dmips-mhz are scaled w.r.t. 2 (cpu@0 and cpu@1), this means that first
|
|
cpu@0 and cpu@1 are twice fast than cpu@2 and cpu@3 (at the same frequency)
|
|
|
|
cpus {
|
|
#address-cells = <1>;
|
|
#size-cells = <0>;
|
|
|
|
cpu0: cpu@0 {
|
|
device_type = "cpu";
|
|
compatible = "arm,cortex-a15";
|
|
reg = <0>;
|
|
capacity-dmips-mhz = <2>;
|
|
};
|
|
|
|
cpu1: cpu@1 {
|
|
device_type = "cpu";
|
|
compatible = "arm,cortex-a15";
|
|
reg = <1>;
|
|
capacity-dmips-mhz = <2>;
|
|
};
|
|
|
|
cpu2: cpu@2 {
|
|
device_type = "cpu";
|
|
compatible = "arm,cortex-a15";
|
|
reg = <0x100>;
|
|
capacity-dmips-mhz = <1>;
|
|
};
|
|
|
|
cpu3: cpu@3 {
|
|
device_type = "cpu";
|
|
compatible = "arm,cortex-a15";
|
|
reg = <0x101>;
|
|
capacity-dmips-mhz = <1>;
|
|
};
|
|
};
|
|
|
|
===========================================
|
|
5 - References
|
|
===========================================
|
|
|
|
[1] ARM Linux Kernel documentation - CPUs bindings
|
|
Documentation/devicetree/bindings/arm/cpus.txt
|