2005-09-24 02:08:58 +07:00
|
|
|
#ifndef __ASM_POWERPC_CPUTABLE_H
|
|
|
|
#define __ASM_POWERPC_CPUTABLE_H
|
|
|
|
|
2008-06-24 00:48:21 +07:00
|
|
|
|
|
|
|
#include <asm/asm-compat.h>
|
2008-06-24 08:32:39 +07:00
|
|
|
#include <asm/feature-fixups.h>
|
2012-10-09 15:47:26 +07:00
|
|
|
#include <uapi/asm/cputable.h>
|
2008-06-24 00:48:21 +07:00
|
|
|
|
2005-09-24 02:08:58 +07:00
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
|
|
|
|
/* This structure can grow, it's real size is used by head.S code
|
|
|
|
* via the mkdefs mechanism.
|
|
|
|
*/
|
|
|
|
struct cpu_spec;
|
|
|
|
|
|
|
|
typedef void (*cpu_setup_t)(unsigned long offset, struct cpu_spec* spec);
|
2006-08-11 12:07:08 +07:00
|
|
|
typedef void (*cpu_restore_t)(void);
|
2005-09-24 02:08:58 +07:00
|
|
|
|
2006-01-09 11:41:31 +07:00
|
|
|
enum powerpc_oprofile_type {
|
2006-01-13 19:35:49 +07:00
|
|
|
PPC_OPROFILE_INVALID = 0,
|
|
|
|
PPC_OPROFILE_RS64 = 1,
|
|
|
|
PPC_OPROFILE_POWER4 = 2,
|
|
|
|
PPC_OPROFILE_G4 = 3,
|
2008-02-05 07:27:55 +07:00
|
|
|
PPC_OPROFILE_FSL_EMB = 4,
|
[POWERPC] cell: Add oprofile support
Add PPU event-based and cycle-based profiling support to Oprofile for Cell.
Oprofile is expected to collect data on all CPUs simultaneously.
However, there is one set of performance counters per node. There are
two hardware threads or virtual CPUs on each node. Hence, OProfile must
multiplex in time the performance counter collection on the two virtual
CPUs.
The multiplexing of the performance counters is done by a virtual
counter routine. Initially, the counters are configured to collect data
on the even CPUs in the system, one CPU per node. In order to capture
the PC for the virtual CPU when the performance counter interrupt occurs
(the specified number of events between samples has occurred), the even
processors are configured to handle the performance counter interrupts
for their node. The virtual counter routine is called via a kernel
timer after the virtual sample time. The routine stops the counters,
saves the current counts, loads the last counts for the other virtual
CPU on the node, sets interrupts to be handled by the other virtual CPU
and restarts the counters, the virtual timer routine is scheduled to run
again. The virtual sample time is kept relatively small to make sure
sampling occurs on both CPUs on the node with a relatively small
granularity. Whenever the counters overflow, the performance counter
interrupt is called to collect the PC for the CPU where data is being
collected.
The oprofile driver relies on a firmware RTAS call to setup the debug bus
to route the desired signals to the performance counter hardware to be
counted. The RTAS call must set the routing registers appropriately in
each of the islands to pass the signals down the debug bus as well as
routing the signals from a particular island onto the bus. There is a
second firmware RTAS call to reset the debug bus to the non pass thru
state when the counters are not in use.
Signed-off-by: Carl Love <carll@us.ibm.com>
Signed-off-by: Maynard Johnson <mpjohn@us.ibm.com>
Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2006-11-21 00:45:16 +07:00
|
|
|
PPC_OPROFILE_CELL = 5,
|
2007-04-18 13:38:21 +07:00
|
|
|
PPC_OPROFILE_PA6T = 6,
|
2006-01-09 11:41:31 +07:00
|
|
|
};
|
|
|
|
|
2007-01-29 10:23:54 +07:00
|
|
|
enum powerpc_pmc_type {
|
|
|
|
PPC_PMC_DEFAULT = 0,
|
|
|
|
PPC_PMC_IBM = 1,
|
|
|
|
PPC_PMC_PA6T = 2,
|
2008-08-18 11:23:51 +07:00
|
|
|
PPC_PMC_G4 = 3,
|
2007-01-29 10:23:54 +07:00
|
|
|
};
|
|
|
|
|
2007-12-21 11:39:21 +07:00
|
|
|
struct pt_regs;
|
|
|
|
|
|
|
|
extern int machine_check_generic(struct pt_regs *regs);
|
|
|
|
extern int machine_check_4xx(struct pt_regs *regs);
|
|
|
|
extern int machine_check_440A(struct pt_regs *regs);
|
2010-04-08 12:38:22 +07:00
|
|
|
extern int machine_check_e500mc(struct pt_regs *regs);
|
2007-12-21 11:39:21 +07:00
|
|
|
extern int machine_check_e500(struct pt_regs *regs);
|
|
|
|
extern int machine_check_e200(struct pt_regs *regs);
|
2010-03-05 10:43:18 +07:00
|
|
|
extern int machine_check_47x(struct pt_regs *regs);
|
2007-12-21 11:39:21 +07:00
|
|
|
|
[POWERPC] Fix performance monitor on machines with logical PVR
Some IBM machines supply a "logical" PVR (processor version register)
value in the device tree in the cpu nodes rather than the real PVR.
This is used for instance to indicate that the processors in a POWER6
partition have been configured by the hypervisor to run in POWER5+
mode rather than POWER6 mode. To cope with this, we call identify_cpu
a second time with the logical PVR value (the first call is with the
real PVR value in the very early setup code).
However, POWER5+ machines can also supply a logical PVR value, and use
the same value (the value that indicates a v2.04 architecture
compliant processor). This causes problems for code that uses the
performance monitor (such as oprofile), because the PMU registers are
different in POWER6 (even in POWER5+ mode) from the real POWER5+.
This change works around this problem by taking out the PMU
information from the cputable entries for the logical PVR values, and
changing identify_cpu so that the second call to it won't overwrite
the PMU information that was established by the first call (the one
with the real PVR), but does update the other fields. Specifically,
if the cputable entry for the logical PVR value has num_pmcs == 0,
none of the PMU-related fields get used.
So that we can create a mixed cputable entry, we now make cur_cpu_spec
point to a single static struct cpu_spec, and copy stuff from
cpu_specs[i] into it. This has the side-effect that we can now make
cpu_specs[] be initdata.
Ultimately it would be good to move the PMU-related fields out to a
separate structure, pointed to by the cputable entries, and change
identify_cpu so that it saves the PMU info pointer, copies the whole
structure, and restores the PMU info pointer, rather than identify_cpu
having to list all the fields that are *not* PMU-related.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2007-10-04 11:18:01 +07:00
|
|
|
/* NOTE WELL: Update identify_cpu() if fields are added or removed! */
|
2005-09-24 02:08:58 +07:00
|
|
|
struct cpu_spec {
|
|
|
|
/* CPU is matched via (PVR & pvr_mask) == pvr_value */
|
|
|
|
unsigned int pvr_mask;
|
|
|
|
unsigned int pvr_value;
|
|
|
|
|
|
|
|
char *cpu_name;
|
|
|
|
unsigned long cpu_features; /* Kernel features */
|
|
|
|
unsigned int cpu_user_features; /* Userland features */
|
2008-12-19 02:13:32 +07:00
|
|
|
unsigned int mmu_features; /* MMU features */
|
2005-09-24 02:08:58 +07:00
|
|
|
|
|
|
|
/* cache line sizes */
|
|
|
|
unsigned int icache_bsize;
|
|
|
|
unsigned int dcache_bsize;
|
|
|
|
|
|
|
|
/* number of performance monitor counters */
|
|
|
|
unsigned int num_pmcs;
|
2007-01-29 10:23:54 +07:00
|
|
|
enum powerpc_pmc_type pmc_type;
|
2005-09-24 02:08:58 +07:00
|
|
|
|
|
|
|
/* this is called to initialize various CPU bits like L1 cache,
|
|
|
|
* BHT, SPD, etc... from head.S before branching to identify_machine
|
|
|
|
*/
|
|
|
|
cpu_setup_t cpu_setup;
|
2006-08-11 12:07:08 +07:00
|
|
|
/* Used to restore cpu setup on secondary processors and at resume */
|
|
|
|
cpu_restore_t cpu_restore;
|
2005-09-24 02:08:58 +07:00
|
|
|
|
|
|
|
/* Used by oprofile userspace to select the right counters */
|
|
|
|
char *oprofile_cpu_type;
|
|
|
|
|
|
|
|
/* Processor specific oprofile operations */
|
2006-01-09 11:41:31 +07:00
|
|
|
enum powerpc_oprofile_type oprofile_type;
|
2006-01-14 06:11:39 +07:00
|
|
|
|
2006-06-08 11:42:34 +07:00
|
|
|
/* Bit locations inside the mmcra change */
|
|
|
|
unsigned long oprofile_mmcra_sihv;
|
|
|
|
unsigned long oprofile_mmcra_sipr;
|
|
|
|
|
|
|
|
/* Bits to clear during an oprofile exception */
|
|
|
|
unsigned long oprofile_mmcra_clear;
|
|
|
|
|
2006-01-14 06:11:39 +07:00
|
|
|
/* Name of processor class, for the ELF AT_PLATFORM entry */
|
|
|
|
char *platform;
|
2007-12-21 11:39:21 +07:00
|
|
|
|
|
|
|
/* Processor specific machine check handling. Return negative
|
|
|
|
* if the error is fatal, 1 if it was fully recovered and 0 to
|
|
|
|
* pass up (not CPU originated) */
|
|
|
|
int (*machine_check)(struct pt_regs *regs);
|
2005-09-24 02:08:58 +07:00
|
|
|
};
|
|
|
|
|
|
|
|
extern struct cpu_spec *cur_cpu_spec;
|
|
|
|
|
2006-10-24 13:42:40 +07:00
|
|
|
extern unsigned int __start___ftr_fixup, __stop___ftr_fixup;
|
|
|
|
|
2006-11-10 16:38:53 +07:00
|
|
|
extern struct cpu_spec *identify_cpu(unsigned long offset, unsigned int pvr);
|
2006-10-20 08:47:18 +07:00
|
|
|
extern void do_feature_fixups(unsigned long value, void *fixup_start,
|
|
|
|
void *fixup_end);
|
2005-10-06 09:06:20 +07:00
|
|
|
|
2008-07-16 06:58:51 +07:00
|
|
|
extern const char *powerpc_base_platform;
|
|
|
|
|
2005-09-24 02:08:58 +07:00
|
|
|
#endif /* __ASSEMBLY__ */
|
|
|
|
|
|
|
|
/* CPU kernel features */
|
|
|
|
|
|
|
|
/* Retain the 32b definitions all use bottom half of word */
|
[POWERPC] Merge CPU features pertaining to icache coherency
Currently the powerpc kernel has a 64-bit only feature,
COHERENT_ICACHE used for those CPUS which maintain icache/dcache
coherency in hardware (POWER5, essentially). It also has a feature,
SPLIT_ID_CACHE, which is used on CPUs which have separate i and
d-caches, which is to say everything except 601 and Freescale E200.
In nearly all the places we check the SPLIT_ID_CACHE, what we actually
care about is whether the i and d-caches are coherent (which they will
be, trivially, if they're the same cache).
This tries to clarify the situation a little. The COHERENT_ICACHE
feature becomes availble on 32-bit and is set for all CPUs where i and
d-cache are effectively coherent, whether this is due to special logic
(POWER5) or because they're unified. We check this, instead of
SPLIT_ID_CACHE nearly everywhere.
The SPLIT_ID_CACHE feature itself is replaced by a UNIFIED_ID_CACHE
feature with reversed sense, set only on 601 and Freescale E200. In
the two places (one Freescale BookE specific) where we really care
whether it's a unified cache, not whether they're coherent, we check
this feature. The CPUs with unified cache are so few, we could
consider replacing this feature bit with explicit checks against the
PVR.
This will make unifying the 32-bit and 64-bit cache flush code a
little more straightforward.
Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2007-06-13 11:52:57 +07:00
|
|
|
#define CPU_FTR_COHERENT_ICACHE ASM_CONST(0x0000000000000001)
|
2005-09-24 02:08:58 +07:00
|
|
|
#define CPU_FTR_L2CR ASM_CONST(0x0000000000000002)
|
|
|
|
#define CPU_FTR_SPEC7450 ASM_CONST(0x0000000000000004)
|
|
|
|
#define CPU_FTR_ALTIVEC ASM_CONST(0x0000000000000008)
|
|
|
|
#define CPU_FTR_TAU ASM_CONST(0x0000000000000010)
|
|
|
|
#define CPU_FTR_CAN_DOZE ASM_CONST(0x0000000000000020)
|
|
|
|
#define CPU_FTR_USE_TB ASM_CONST(0x0000000000000040)
|
2008-06-19 21:40:31 +07:00
|
|
|
#define CPU_FTR_L2CSR ASM_CONST(0x0000000000000080)
|
2005-09-24 02:08:58 +07:00
|
|
|
#define CPU_FTR_601 ASM_CONST(0x0000000000000100)
|
2009-02-12 20:54:53 +07:00
|
|
|
#define CPU_FTR_DBELL ASM_CONST(0x0000000000000200)
|
2005-09-24 02:08:58 +07:00
|
|
|
#define CPU_FTR_CAN_NAP ASM_CONST(0x0000000000000400)
|
|
|
|
#define CPU_FTR_L3CR ASM_CONST(0x0000000000000800)
|
|
|
|
#define CPU_FTR_L3_DISABLE_NAP ASM_CONST(0x0000000000001000)
|
|
|
|
#define CPU_FTR_NAP_DISABLE_L2_PR ASM_CONST(0x0000000000002000)
|
|
|
|
#define CPU_FTR_DUAL_PLL_750FX ASM_CONST(0x0000000000004000)
|
|
|
|
#define CPU_FTR_NO_DPM ASM_CONST(0x0000000000008000)
|
2011-01-26 13:17:58 +07:00
|
|
|
#define CPU_FTR_476_DD2 ASM_CONST(0x0000000000010000)
|
2005-09-24 02:08:58 +07:00
|
|
|
#define CPU_FTR_NEED_COHERENT ASM_CONST(0x0000000000020000)
|
|
|
|
#define CPU_FTR_NO_BTIC ASM_CONST(0x0000000000040000)
|
2011-04-06 12:18:48 +07:00
|
|
|
#define CPU_FTR_DEBUG_LVL_EXC ASM_CONST(0x0000000000080000)
|
2006-03-21 16:45:58 +07:00
|
|
|
#define CPU_FTR_NODSISRALIGN ASM_CONST(0x0000000000100000)
|
2006-06-07 13:14:40 +07:00
|
|
|
#define CPU_FTR_PPC_LE ASM_CONST(0x0000000000200000)
|
|
|
|
#define CPU_FTR_REAL_LE ASM_CONST(0x0000000000400000)
|
2006-12-08 15:43:30 +07:00
|
|
|
#define CPU_FTR_FPU_UNAVAILABLE ASM_CONST(0x0000000000800000)
|
[POWERPC] Merge CPU features pertaining to icache coherency
Currently the powerpc kernel has a 64-bit only feature,
COHERENT_ICACHE used for those CPUS which maintain icache/dcache
coherency in hardware (POWER5, essentially). It also has a feature,
SPLIT_ID_CACHE, which is used on CPUs which have separate i and
d-caches, which is to say everything except 601 and Freescale E200.
In nearly all the places we check the SPLIT_ID_CACHE, what we actually
care about is whether the i and d-caches are coherent (which they will
be, trivially, if they're the same cache).
This tries to clarify the situation a little. The COHERENT_ICACHE
feature becomes availble on 32-bit and is set for all CPUs where i and
d-cache are effectively coherent, whether this is due to special logic
(POWER5) or because they're unified. We check this, instead of
SPLIT_ID_CACHE nearly everywhere.
The SPLIT_ID_CACHE feature itself is replaced by a UNIFIED_ID_CACHE
feature with reversed sense, set only on 601 and Freescale E200. In
the two places (one Freescale BookE specific) where we really care
whether it's a unified cache, not whether they're coherent, we check
this feature. The CPUs with unified cache are so few, we could
consider replacing this feature bit with explicit checks against the
PVR.
This will make unifying the 32-bit and 64-bit cache flush code a
little more straightforward.
Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2007-06-13 11:52:57 +07:00
|
|
|
#define CPU_FTR_UNIFIED_ID_CACHE ASM_CONST(0x0000000001000000)
|
2007-09-13 13:44:20 +07:00
|
|
|
#define CPU_FTR_SPE ASM_CONST(0x0000000002000000)
|
2007-11-10 05:17:49 +07:00
|
|
|
#define CPU_FTR_NEED_PAIRED_STWCX ASM_CONST(0x0000000004000000)
|
2008-07-01 22:16:40 +07:00
|
|
|
#define CPU_FTR_LWSYNC ASM_CONST(0x0000000008000000)
|
2008-12-12 13:33:25 +07:00
|
|
|
#define CPU_FTR_NOEXECUTE ASM_CONST(0x0000000010000000)
|
2008-12-19 02:13:22 +07:00
|
|
|
#define CPU_FTR_INDEXED_DCR ASM_CONST(0x0000000020000000)
|
2011-12-20 22:34:47 +07:00
|
|
|
#define CPU_FTR_EMB_HV ASM_CONST(0x0000000040000000)
|
2005-09-24 02:08:58 +07:00
|
|
|
|
2006-06-28 10:50:39 +07:00
|
|
|
/*
|
|
|
|
* Add the 64-bit processor unique features in the top half of the word;
|
|
|
|
* on 32-bit, make the names available but defined to be 0.
|
|
|
|
*/
|
2005-09-24 02:08:58 +07:00
|
|
|
#ifdef __powerpc64__
|
2006-06-28 10:50:39 +07:00
|
|
|
#define LONG_ASM_CONST(x) ASM_CONST(x)
|
2005-09-24 02:08:58 +07:00
|
|
|
#else
|
2006-06-28 10:50:39 +07:00
|
|
|
#define LONG_ASM_CONST(x) 0
|
2005-09-24 02:08:58 +07:00
|
|
|
#endif
|
|
|
|
|
powerpc, KVM: Split HVMODE_206 cpu feature bit into separate HV and architecture bits
This replaces the single CPU_FTR_HVMODE_206 bit with two bits, one to
indicate that we have a usable hypervisor mode, and another to indicate
that the processor conforms to PowerISA version 2.06. We also add
another bit to indicate that the processor conforms to ISA version 2.01
and set that for PPC970 and derivatives.
Some PPC970 chips (specifically those in Apple machines) have a
hypervisor mode in that MSR[HV] is always 1, but the hypervisor mode
is not useful in the sense that there is no way to run any code in
supervisor mode (HV=0 PR=0). On these processors, the LPES0 and LPES1
bits in HID4 are always 0, and we use that as a way of detecting that
hypervisor mode is not useful.
Where we have a feature section in assembly code around code that
only applies on POWER7 in hypervisor mode, we use a construct like
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
The definition of END_FTR_SECTION_IFSET is such that the code will
be enabled (not overwritten with nops) only if all bits in the
provided mask are set.
Note that the CPU feature check in __tlbie() only needs to check the
ARCH_206 bit, not the HVMODE bit, because __tlbie() can only get called
if we are running bare-metal, i.e. in hypervisor mode.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 07:26:11 +07:00
|
|
|
#define CPU_FTR_HVMODE LONG_ASM_CONST(0x0000000200000000)
|
|
|
|
#define CPU_FTR_ARCH_201 LONG_ASM_CONST(0x0000000400000000)
|
|
|
|
#define CPU_FTR_ARCH_206 LONG_ASM_CONST(0x0000000800000000)
|
2011-05-02 02:48:20 +07:00
|
|
|
#define CPU_FTR_CFAR LONG_ASM_CONST(0x0000001000000000)
|
2006-06-28 10:50:39 +07:00
|
|
|
#define CPU_FTR_IABR LONG_ASM_CONST(0x0000002000000000)
|
|
|
|
#define CPU_FTR_MMCRA LONG_ASM_CONST(0x0000004000000000)
|
|
|
|
#define CPU_FTR_CTRL LONG_ASM_CONST(0x0000008000000000)
|
|
|
|
#define CPU_FTR_SMT LONG_ASM_CONST(0x0000010000000000)
|
|
|
|
#define CPU_FTR_PAUSE_ZERO LONG_ASM_CONST(0x0000200000000000)
|
|
|
|
#define CPU_FTR_PURR LONG_ASM_CONST(0x0000400000000000)
|
2006-10-20 11:37:05 +07:00
|
|
|
#define CPU_FTR_CELL_TB_BUG LONG_ASM_CONST(0x0000800000000000)
|
2006-11-10 16:38:53 +07:00
|
|
|
#define CPU_FTR_SPURR LONG_ASM_CONST(0x0001000000000000)
|
2006-12-08 13:46:58 +07:00
|
|
|
#define CPU_FTR_DSCR LONG_ASM_CONST(0x0002000000000000)
|
2008-06-25 11:07:18 +07:00
|
|
|
#define CPU_FTR_VSX LONG_ASM_CONST(0x0010000000000000)
|
2008-07-07 21:28:53 +07:00
|
|
|
#define CPU_FTR_SAO LONG_ASM_CONST(0x0020000000000000)
|
2008-08-22 11:36:19 +07:00
|
|
|
#define CPU_FTR_CP_USE_DCBTZ LONG_ASM_CONST(0x0040000000000000)
|
2008-10-27 07:43:02 +07:00
|
|
|
#define CPU_FTR_UNALIGNED_LD_STD LONG_ASM_CONST(0x0080000000000000)
|
powerpc: Enable asymmetric SMT scheduling on POWER7
The POWER7 core has dynamic SMT mode switching which is controlled by
the hypervisor. There are 3 SMT modes:
SMT1 uses thread 0
SMT2 uses threads 0 & 1
SMT4 uses threads 0, 1, 2 & 3
When in any particular SMT mode, all threads have the same performance
as each other (ie. at any moment in time, all threads perform the same).
The SMT mode switching works such that when linux has threads 2 & 3 idle
and 0 & 1 active, it will cede (H_CEDE hypercall) threads 2 and 3 in the
idle loop and the hypervisor will automatically switch to SMT2 for that
core (independent of other cores). The opposite is not true, so if
threads 0 & 1 are idle and 2 & 3 are active, we will stay in SMT4 mode.
Similarly if thread 0 is active and threads 1, 2 & 3 are idle, we'll go
into SMT1 mode.
If we can get the core into a lower SMT mode (SMT1 is best), the threads
will perform better (since they share less core resources). Hence when
we have idle threads, we want them to be the higher ones.
This adds a feature bit for asymmetric packing to powerpc and then
enables it on POWER7.
Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: linuxppc-dev@ozlabs.org
LKML-Reference: <20100608045702.31FB5CC8C7@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-06-08 11:57:02 +07:00
|
|
|
#define CPU_FTR_ASYM_SMT LONG_ASM_CONST(0x0100000000000000)
|
2010-08-11 08:40:27 +07:00
|
|
|
#define CPU_FTR_STCX_CHECKS_ADDRESS LONG_ASM_CONST(0x0200000000000000)
|
2010-08-12 23:28:09 +07:00
|
|
|
#define CPU_FTR_POPCNTB LONG_ASM_CONST(0x0400000000000000)
|
|
|
|
#define CPU_FTR_POPCNTD LONG_ASM_CONST(0x0800000000000000)
|
2011-05-03 03:43:04 +07:00
|
|
|
#define CPU_FTR_ICSWX LONG_ASM_CONST(0x1000000000000000)
|
powerpc: POWER7 optimised copy_to_user/copy_from_user using VMX
Implement a POWER7 optimised copy_to_user/copy_from_user using VMX.
For large aligned copies this new loop is over 10% faster, and for
large unaligned copies it is over 200% faster.
If we take a fault we fall back to the old version, this keeps
things relatively simple and easy to verify.
On POWER7 unaligned stores rarely slow down - they only flush when
a store crosses a 4KB page boundary. Furthermore this flush is
handled completely in hardware and should be 20-30 cycles.
Unaligned loads on the other hand flush much more often - whenever
crossing a 128 byte cache line, or a 32 byte sector if either sector
is an L1 miss.
Considering this information we really want to get the loads aligned
and not worry about the alignment of the stores. Microbenchmarks
confirm that this approach is much faster than the current unaligned
copy loop that uses shifts and rotates to ensure both loads and
stores are aligned.
We also want to try and do the stores in cacheline aligned, cacheline
sized chunks. If the store queue is unable to merge an entire
cacheline of stores then the L2 cache will have to do a
read/modify/write. Even worse, we will serialise this with the stores
in the next iteration of the copy loop since both iterations hit
the same cacheline.
Based on this, the new loop does the following things:
1 - 127 bytes
Get the source 8 byte aligned and use 8 byte loads and stores. Pretty
boring and similar to how the current loop works.
128 - 4095 bytes
Get the source 8 byte aligned and use 8 byte loads and stores,
1 cacheline at a time. We aren't doing the stores in cacheline
aligned chunks so we will potentially serialise once per cacheline.
Even so it is much better than the loop we have today.
4096 - bytes
If both source and destination have the same alignment get them both
16 byte aligned, then get the destination cacheline aligned. Do
cacheline sized loads and stores using VMX.
If source and destination do not have the same alignment, we get the
destination cacheline aligned, and use permute to do aligned loads.
In both cases the VMX loop should be optimal - we always do aligned
loads and stores and are always doing stores in cacheline aligned,
cacheline sized chunks.
To be able to use VMX we must be careful about interrupts and
sleeping. We don't use the VMX loop when in an interrupt (which should
be rare anyway) and we wrap the VMX loop in disable/enable_pagefault
and fall back to the existing copy_tofrom_user loop if we do need to
sleep.
The VMX breakpoint of 4096 bytes was chosen using this microbenchmark:
http://ozlabs.org/~anton/junkcode/copy_to_user.c
Since we are using VMX and there is a cost to saving and restoring
the user VMX state there are two broad cases we need to benchmark:
- Best case - userspace never uses VMX
- Worst case - userspace always uses VMX
In reality a userspace process will sit somewhere between these two
extremes. Since we need to test both aligned and unaligned copies we
end up with 4 combinations. The point at which the VMX loop begins to
win is:
0% VMX
aligned 2048 bytes
unaligned 2048 bytes
100% VMX
aligned 16384 bytes
unaligned 8192 bytes
Considering this is a microbenchmark, the data is hot in cache and
the VMX loop has better store queue merging properties we set the
breakpoint to 4096 bytes, a little below the unaligned breakpoints.
Some future optimisations we can look at:
- Looking at the perf data, a significant part of the cost when a
task is always using VMX is the extra exception we take to restore
the VMX state. As such we should do something similar to the x86
optimisation that restores FPU state for heavy users. ie:
/*
* If the task has used fpu the last 5 timeslices, just do a full
* restore of the math state immediately to avoid the trap; the
* chances of needing FPU soon are obviously high now
*/
preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
and
/*
* fpu_counter contains the number of consecutive context switches
* that the FPU is used. If this is over a threshold, the lazy fpu
* saving becomes unlazy to save the trap. This is an unsigned char
* so that after 256 times the counter wraps and the behavior turns
* lazy again; this to deal with bursty apps that only use FPU for
* a short time
*/
- We could create a paca bit to mirror the VMX enabled MSR bit and check
that first, avoiding multiple calls to calling enable_kernel_altivec.
That should help with iovec based system calls like readv.
- We could have two VMX breakpoints, one for when we know the user VMX
state is loaded into the registers and one when it isn't. This could
be a second bit in the paca so we can calculate the break points quickly.
- One suggestion from Ben was to save and restore the VSX registers
we use inline instead of using enable_kernel_altivec.
[BenH: Fixed a problem with preempt and fixed build without CONFIG_ALTIVEC]
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2011-12-08 03:11:45 +07:00
|
|
|
#define CPU_FTR_VMX_COPY LONG_ASM_CONST(0x2000000000000000)
|
2006-06-28 10:50:39 +07:00
|
|
|
|
2005-09-24 02:08:58 +07:00
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
|
2011-04-07 02:48:50 +07:00
|
|
|
#define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_NOEXECUTE | CPU_FTR_NODSISRALIGN)
|
|
|
|
|
|
|
|
#define MMU_FTR_PPCAS_ARCH_V2 (MMU_FTR_SLB | MMU_FTR_TLBIEL | \
|
|
|
|
MMU_FTR_16M_PAGE)
|
2005-09-24 02:08:58 +07:00
|
|
|
|
|
|
|
/* We only set the altivec features if the kernel was compiled with altivec
|
|
|
|
* support
|
|
|
|
*/
|
|
|
|
#ifdef CONFIG_ALTIVEC
|
|
|
|
#define CPU_FTR_ALTIVEC_COMP CPU_FTR_ALTIVEC
|
|
|
|
#define PPC_FEATURE_HAS_ALTIVEC_COMP PPC_FEATURE_HAS_ALTIVEC
|
|
|
|
#else
|
|
|
|
#define CPU_FTR_ALTIVEC_COMP 0
|
|
|
|
#define PPC_FEATURE_HAS_ALTIVEC_COMP 0
|
|
|
|
#endif
|
|
|
|
|
2008-06-25 11:07:18 +07:00
|
|
|
/* We only set the VSX features if the kernel was compiled with VSX
|
|
|
|
* support
|
|
|
|
*/
|
|
|
|
#ifdef CONFIG_VSX
|
|
|
|
#define CPU_FTR_VSX_COMP CPU_FTR_VSX
|
|
|
|
#define PPC_FEATURE_HAS_VSX_COMP PPC_FEATURE_HAS_VSX
|
|
|
|
#else
|
|
|
|
#define CPU_FTR_VSX_COMP 0
|
|
|
|
#define PPC_FEATURE_HAS_VSX_COMP 0
|
|
|
|
#endif
|
|
|
|
|
2007-09-13 13:44:20 +07:00
|
|
|
/* We only set the spe features if the kernel was compiled with spe
|
|
|
|
* support
|
|
|
|
*/
|
|
|
|
#ifdef CONFIG_SPE
|
|
|
|
#define CPU_FTR_SPE_COMP CPU_FTR_SPE
|
|
|
|
#define PPC_FEATURE_HAS_SPE_COMP PPC_FEATURE_HAS_SPE
|
|
|
|
#define PPC_FEATURE_HAS_EFP_SINGLE_COMP PPC_FEATURE_HAS_EFP_SINGLE
|
|
|
|
#define PPC_FEATURE_HAS_EFP_DOUBLE_COMP PPC_FEATURE_HAS_EFP_DOUBLE
|
|
|
|
#else
|
|
|
|
#define CPU_FTR_SPE_COMP 0
|
|
|
|
#define PPC_FEATURE_HAS_SPE_COMP 0
|
|
|
|
#define PPC_FEATURE_HAS_EFP_SINGLE_COMP 0
|
|
|
|
#define PPC_FEATURE_HAS_EFP_DOUBLE_COMP 0
|
|
|
|
#endif
|
|
|
|
|
2007-09-15 03:32:14 +07:00
|
|
|
/* We need to mark all pages as being coherent if we're SMP or we have a
|
|
|
|
* 74[45]x and an MPC107 host bridge. Also 83xx and PowerQUICC II
|
|
|
|
* require it for PCI "streaming/prefetch" to work properly.
|
2009-03-17 22:17:50 +07:00
|
|
|
* This is also required by 52xx family.
|
2005-09-24 02:08:58 +07:00
|
|
|
*/
|
2006-02-22 22:46:02 +07:00
|
|
|
#if defined(CONFIG_SMP) || defined(CONFIG_MPC10X_BRIDGE) \
|
2009-03-17 22:17:50 +07:00
|
|
|
|| defined(CONFIG_PPC_83xx) || defined(CONFIG_8260) \
|
|
|
|
|| defined(CONFIG_PPC_MPC52xx)
|
2005-09-24 02:08:58 +07:00
|
|
|
#define CPU_FTR_COMMON CPU_FTR_NEED_COHERENT
|
|
|
|
#else
|
|
|
|
#define CPU_FTR_COMMON 0
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* The powersave features NAP & DOZE seems to confuse BDI when
|
|
|
|
debugging. So if a BDI is used, disable theses
|
|
|
|
*/
|
|
|
|
#ifndef CONFIG_BDI_SWITCH
|
|
|
|
#define CPU_FTR_MAYBE_CAN_DOZE CPU_FTR_CAN_DOZE
|
|
|
|
#define CPU_FTR_MAYBE_CAN_NAP CPU_FTR_CAN_NAP
|
|
|
|
#else
|
|
|
|
#define CPU_FTR_MAYBE_CAN_DOZE 0
|
|
|
|
#define CPU_FTR_MAYBE_CAN_NAP 0
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define CLASSIC_PPC (!defined(CONFIG_8xx) && !defined(CONFIG_4xx) && \
|
|
|
|
!defined(CONFIG_POWER3) && !defined(CONFIG_POWER4) && \
|
|
|
|
!defined(CONFIG_BOOKE))
|
|
|
|
|
2008-12-19 02:13:32 +07:00
|
|
|
#define CPU_FTRS_PPC601 (CPU_FTR_COMMON | CPU_FTR_601 | \
|
[POWERPC] Merge CPU features pertaining to icache coherency
Currently the powerpc kernel has a 64-bit only feature,
COHERENT_ICACHE used for those CPUS which maintain icache/dcache
coherency in hardware (POWER5, essentially). It also has a feature,
SPLIT_ID_CACHE, which is used on CPUs which have separate i and
d-caches, which is to say everything except 601 and Freescale E200.
In nearly all the places we check the SPLIT_ID_CACHE, what we actually
care about is whether the i and d-caches are coherent (which they will
be, trivially, if they're the same cache).
This tries to clarify the situation a little. The COHERENT_ICACHE
feature becomes availble on 32-bit and is set for all CPUs where i and
d-cache are effectively coherent, whether this is due to special logic
(POWER5) or because they're unified. We check this, instead of
SPLIT_ID_CACHE nearly everywhere.
The SPLIT_ID_CACHE feature itself is replaced by a UNIFIED_ID_CACHE
feature with reversed sense, set only on 601 and Freescale E200. In
the two places (one Freescale BookE specific) where we really care
whether it's a unified cache, not whether they're coherent, we check
this feature. The CPUs with unified cache are so few, we could
consider replacing this feature bit with explicit checks against the
PVR.
This will make unifying the 32-bit and 64-bit cache flush code a
little more straightforward.
Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2007-06-13 11:52:57 +07:00
|
|
|
CPU_FTR_COHERENT_ICACHE | CPU_FTR_UNIFIED_ID_CACHE)
|
|
|
|
#define CPU_FTRS_603 (CPU_FTR_COMMON | \
|
2006-03-23 13:36:59 +07:00
|
|
|
CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \
|
2006-06-07 13:14:40 +07:00
|
|
|
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
|
[POWERPC] Merge CPU features pertaining to icache coherency
Currently the powerpc kernel has a 64-bit only feature,
COHERENT_ICACHE used for those CPUS which maintain icache/dcache
coherency in hardware (POWER5, essentially). It also has a feature,
SPLIT_ID_CACHE, which is used on CPUs which have separate i and
d-caches, which is to say everything except 601 and Freescale E200.
In nearly all the places we check the SPLIT_ID_CACHE, what we actually
care about is whether the i and d-caches are coherent (which they will
be, trivially, if they're the same cache).
This tries to clarify the situation a little. The COHERENT_ICACHE
feature becomes availble on 32-bit and is set for all CPUs where i and
d-cache are effectively coherent, whether this is due to special logic
(POWER5) or because they're unified. We check this, instead of
SPLIT_ID_CACHE nearly everywhere.
The SPLIT_ID_CACHE feature itself is replaced by a UNIFIED_ID_CACHE
feature with reversed sense, set only on 601 and Freescale E200. In
the two places (one Freescale BookE specific) where we really care
whether it's a unified cache, not whether they're coherent, we check
this feature. The CPUs with unified cache are so few, we could
consider replacing this feature bit with explicit checks against the
PVR.
This will make unifying the 32-bit and 64-bit cache flush code a
little more straightforward.
Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2007-06-13 11:52:57 +07:00
|
|
|
#define CPU_FTRS_604 (CPU_FTR_COMMON | \
|
2008-12-19 02:13:32 +07:00
|
|
|
CPU_FTR_USE_TB | CPU_FTR_PPC_LE)
|
[POWERPC] Merge CPU features pertaining to icache coherency
Currently the powerpc kernel has a 64-bit only feature,
COHERENT_ICACHE used for those CPUS which maintain icache/dcache
coherency in hardware (POWER5, essentially). It also has a feature,
SPLIT_ID_CACHE, which is used on CPUs which have separate i and
d-caches, which is to say everything except 601 and Freescale E200.
In nearly all the places we check the SPLIT_ID_CACHE, what we actually
care about is whether the i and d-caches are coherent (which they will
be, trivially, if they're the same cache).
This tries to clarify the situation a little. The COHERENT_ICACHE
feature becomes availble on 32-bit and is set for all CPUs where i and
d-cache are effectively coherent, whether this is due to special logic
(POWER5) or because they're unified. We check this, instead of
SPLIT_ID_CACHE nearly everywhere.
The SPLIT_ID_CACHE feature itself is replaced by a UNIFIED_ID_CACHE
feature with reversed sense, set only on 601 and Freescale E200. In
the two places (one Freescale BookE specific) where we really care
whether it's a unified cache, not whether they're coherent, we check
this feature. The CPUs with unified cache are so few, we could
consider replacing this feature bit with explicit checks against the
PVR.
This will make unifying the 32-bit and 64-bit cache flush code a
little more straightforward.
Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2007-06-13 11:52:57 +07:00
|
|
|
#define CPU_FTRS_740_NOTAU (CPU_FTR_COMMON | \
|
2006-03-23 13:36:59 +07:00
|
|
|
CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
|
2008-12-19 02:13:32 +07:00
|
|
|
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
|
[POWERPC] Merge CPU features pertaining to icache coherency
Currently the powerpc kernel has a 64-bit only feature,
COHERENT_ICACHE used for those CPUS which maintain icache/dcache
coherency in hardware (POWER5, essentially). It also has a feature,
SPLIT_ID_CACHE, which is used on CPUs which have separate i and
d-caches, which is to say everything except 601 and Freescale E200.
In nearly all the places we check the SPLIT_ID_CACHE, what we actually
care about is whether the i and d-caches are coherent (which they will
be, trivially, if they're the same cache).
This tries to clarify the situation a little. The COHERENT_ICACHE
feature becomes availble on 32-bit and is set for all CPUs where i and
d-cache are effectively coherent, whether this is due to special logic
(POWER5) or because they're unified. We check this, instead of
SPLIT_ID_CACHE nearly everywhere.
The SPLIT_ID_CACHE feature itself is replaced by a UNIFIED_ID_CACHE
feature with reversed sense, set only on 601 and Freescale E200. In
the two places (one Freescale BookE specific) where we really care
whether it's a unified cache, not whether they're coherent, we check
this feature. The CPUs with unified cache are so few, we could
consider replacing this feature bit with explicit checks against the
PVR.
This will make unifying the 32-bit and 64-bit cache flush code a
little more straightforward.
Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2007-06-13 11:52:57 +07:00
|
|
|
#define CPU_FTRS_740 (CPU_FTR_COMMON | \
|
2006-03-23 13:36:59 +07:00
|
|
|
CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
|
2008-12-19 02:13:32 +07:00
|
|
|
CPU_FTR_TAU | CPU_FTR_MAYBE_CAN_NAP | \
|
2006-06-07 13:14:40 +07:00
|
|
|
CPU_FTR_PPC_LE)
|
[POWERPC] Merge CPU features pertaining to icache coherency
Currently the powerpc kernel has a 64-bit only feature,
COHERENT_ICACHE used for those CPUS which maintain icache/dcache
coherency in hardware (POWER5, essentially). It also has a feature,
SPLIT_ID_CACHE, which is used on CPUs which have separate i and
d-caches, which is to say everything except 601 and Freescale E200.
In nearly all the places we check the SPLIT_ID_CACHE, what we actually
care about is whether the i and d-caches are coherent (which they will
be, trivially, if they're the same cache).
This tries to clarify the situation a little. The COHERENT_ICACHE
feature becomes availble on 32-bit and is set for all CPUs where i and
d-cache are effectively coherent, whether this is due to special logic
(POWER5) or because they're unified. We check this, instead of
SPLIT_ID_CACHE nearly everywhere.
The SPLIT_ID_CACHE feature itself is replaced by a UNIFIED_ID_CACHE
feature with reversed sense, set only on 601 and Freescale E200. In
the two places (one Freescale BookE specific) where we really care
whether it's a unified cache, not whether they're coherent, we check
this feature. The CPUs with unified cache are so few, we could
consider replacing this feature bit with explicit checks against the
PVR.
This will make unifying the 32-bit and 64-bit cache flush code a
little more straightforward.
Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2007-06-13 11:52:57 +07:00
|
|
|
#define CPU_FTRS_750 (CPU_FTR_COMMON | \
|
2006-03-23 13:36:59 +07:00
|
|
|
CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
|
2008-12-19 02:13:32 +07:00
|
|
|
CPU_FTR_TAU | CPU_FTR_MAYBE_CAN_NAP | \
|
2006-06-07 13:14:40 +07:00
|
|
|
CPU_FTR_PPC_LE)
|
2008-12-19 02:13:32 +07:00
|
|
|
#define CPU_FTRS_750CL (CPU_FTRS_750)
|
2007-07-02 23:06:53 +07:00
|
|
|
#define CPU_FTRS_750FX1 (CPU_FTRS_750 | CPU_FTR_DUAL_PLL_750FX | CPU_FTR_NO_DPM)
|
|
|
|
#define CPU_FTRS_750FX2 (CPU_FTRS_750 | CPU_FTR_NO_DPM)
|
2008-12-19 02:13:32 +07:00
|
|
|
#define CPU_FTRS_750FX (CPU_FTRS_750 | CPU_FTR_DUAL_PLL_750FX)
|
2007-07-02 23:06:53 +07:00
|
|
|
#define CPU_FTRS_750GX (CPU_FTRS_750FX)
|
[POWERPC] Merge CPU features pertaining to icache coherency
Currently the powerpc kernel has a 64-bit only feature,
COHERENT_ICACHE used for those CPUS which maintain icache/dcache
coherency in hardware (POWER5, essentially). It also has a feature,
SPLIT_ID_CACHE, which is used on CPUs which have separate i and
d-caches, which is to say everything except 601 and Freescale E200.
In nearly all the places we check the SPLIT_ID_CACHE, what we actually
care about is whether the i and d-caches are coherent (which they will
be, trivially, if they're the same cache).
This tries to clarify the situation a little. The COHERENT_ICACHE
feature becomes availble on 32-bit and is set for all CPUs where i and
d-cache are effectively coherent, whether this is due to special logic
(POWER5) or because they're unified. We check this, instead of
SPLIT_ID_CACHE nearly everywhere.
The SPLIT_ID_CACHE feature itself is replaced by a UNIFIED_ID_CACHE
feature with reversed sense, set only on 601 and Freescale E200. In
the two places (one Freescale BookE specific) where we really care
whether it's a unified cache, not whether they're coherent, we check
this feature. The CPUs with unified cache are so few, we could
consider replacing this feature bit with explicit checks against the
PVR.
This will make unifying the 32-bit and 64-bit cache flush code a
little more straightforward.
Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2007-06-13 11:52:57 +07:00
|
|
|
#define CPU_FTRS_7400_NOTAU (CPU_FTR_COMMON | \
|
2006-03-23 13:36:59 +07:00
|
|
|
CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
|
2008-12-19 02:13:32 +07:00
|
|
|
CPU_FTR_ALTIVEC_COMP | \
|
2006-06-07 13:14:40 +07:00
|
|
|
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
|
[POWERPC] Merge CPU features pertaining to icache coherency
Currently the powerpc kernel has a 64-bit only feature,
COHERENT_ICACHE used for those CPUS which maintain icache/dcache
coherency in hardware (POWER5, essentially). It also has a feature,
SPLIT_ID_CACHE, which is used on CPUs which have separate i and
d-caches, which is to say everything except 601 and Freescale E200.
In nearly all the places we check the SPLIT_ID_CACHE, what we actually
care about is whether the i and d-caches are coherent (which they will
be, trivially, if they're the same cache).
This tries to clarify the situation a little. The COHERENT_ICACHE
feature becomes availble on 32-bit and is set for all CPUs where i and
d-cache are effectively coherent, whether this is due to special logic
(POWER5) or because they're unified. We check this, instead of
SPLIT_ID_CACHE nearly everywhere.
The SPLIT_ID_CACHE feature itself is replaced by a UNIFIED_ID_CACHE
feature with reversed sense, set only on 601 and Freescale E200. In
the two places (one Freescale BookE specific) where we really care
whether it's a unified cache, not whether they're coherent, we check
this feature. The CPUs with unified cache are so few, we could
consider replacing this feature bit with explicit checks against the
PVR.
This will make unifying the 32-bit and 64-bit cache flush code a
little more straightforward.
Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2007-06-13 11:52:57 +07:00
|
|
|
#define CPU_FTRS_7400 (CPU_FTR_COMMON | \
|
2006-03-23 13:36:59 +07:00
|
|
|
CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
|
2008-12-19 02:13:32 +07:00
|
|
|
CPU_FTR_TAU | CPU_FTR_ALTIVEC_COMP | \
|
2006-06-07 13:14:40 +07:00
|
|
|
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
|
[POWERPC] Merge CPU features pertaining to icache coherency
Currently the powerpc kernel has a 64-bit only feature,
COHERENT_ICACHE used for those CPUS which maintain icache/dcache
coherency in hardware (POWER5, essentially). It also has a feature,
SPLIT_ID_CACHE, which is used on CPUs which have separate i and
d-caches, which is to say everything except 601 and Freescale E200.
In nearly all the places we check the SPLIT_ID_CACHE, what we actually
care about is whether the i and d-caches are coherent (which they will
be, trivially, if they're the same cache).
This tries to clarify the situation a little. The COHERENT_ICACHE
feature becomes availble on 32-bit and is set for all CPUs where i and
d-cache are effectively coherent, whether this is due to special logic
(POWER5) or because they're unified. We check this, instead of
SPLIT_ID_CACHE nearly everywhere.
The SPLIT_ID_CACHE feature itself is replaced by a UNIFIED_ID_CACHE
feature with reversed sense, set only on 601 and Freescale E200. In
the two places (one Freescale BookE specific) where we really care
whether it's a unified cache, not whether they're coherent, we check
this feature. The CPUs with unified cache are so few, we could
consider replacing this feature bit with explicit checks against the
PVR.
This will make unifying the 32-bit and 64-bit cache flush code a
little more straightforward.
Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2007-06-13 11:52:57 +07:00
|
|
|
#define CPU_FTRS_7450_20 (CPU_FTR_COMMON | \
|
2006-03-23 13:36:59 +07:00
|
|
|
CPU_FTR_USE_TB | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
|
2008-12-19 02:13:32 +07:00
|
|
|
CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \
|
2007-11-10 05:17:49 +07:00
|
|
|
CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
|
[POWERPC] Merge CPU features pertaining to icache coherency
Currently the powerpc kernel has a 64-bit only feature,
COHERENT_ICACHE used for those CPUS which maintain icache/dcache
coherency in hardware (POWER5, essentially). It also has a feature,
SPLIT_ID_CACHE, which is used on CPUs which have separate i and
d-caches, which is to say everything except 601 and Freescale E200.
In nearly all the places we check the SPLIT_ID_CACHE, what we actually
care about is whether the i and d-caches are coherent (which they will
be, trivially, if they're the same cache).
This tries to clarify the situation a little. The COHERENT_ICACHE
feature becomes availble on 32-bit and is set for all CPUs where i and
d-cache are effectively coherent, whether this is due to special logic
(POWER5) or because they're unified. We check this, instead of
SPLIT_ID_CACHE nearly everywhere.
The SPLIT_ID_CACHE feature itself is replaced by a UNIFIED_ID_CACHE
feature with reversed sense, set only on 601 and Freescale E200. In
the two places (one Freescale BookE specific) where we really care
whether it's a unified cache, not whether they're coherent, we check
this feature. The CPUs with unified cache are so few, we could
consider replacing this feature bit with explicit checks against the
PVR.
This will make unifying the 32-bit and 64-bit cache flush code a
little more straightforward.
Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2007-06-13 11:52:57 +07:00
|
|
|
#define CPU_FTRS_7450_21 (CPU_FTR_COMMON | \
|
2006-03-23 13:36:59 +07:00
|
|
|
CPU_FTR_USE_TB | \
|
|
|
|
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
|
2008-12-19 02:13:32 +07:00
|
|
|
CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \
|
2006-03-23 13:36:59 +07:00
|
|
|
CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_L3_DISABLE_NAP | \
|
2007-11-10 05:17:49 +07:00
|
|
|
CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
|
[POWERPC] Merge CPU features pertaining to icache coherency
Currently the powerpc kernel has a 64-bit only feature,
COHERENT_ICACHE used for those CPUS which maintain icache/dcache
coherency in hardware (POWER5, essentially). It also has a feature,
SPLIT_ID_CACHE, which is used on CPUs which have separate i and
d-caches, which is to say everything except 601 and Freescale E200.
In nearly all the places we check the SPLIT_ID_CACHE, what we actually
care about is whether the i and d-caches are coherent (which they will
be, trivially, if they're the same cache).
This tries to clarify the situation a little. The COHERENT_ICACHE
feature becomes availble on 32-bit and is set for all CPUs where i and
d-cache are effectively coherent, whether this is due to special logic
(POWER5) or because they're unified. We check this, instead of
SPLIT_ID_CACHE nearly everywhere.
The SPLIT_ID_CACHE feature itself is replaced by a UNIFIED_ID_CACHE
feature with reversed sense, set only on 601 and Freescale E200. In
the two places (one Freescale BookE specific) where we really care
whether it's a unified cache, not whether they're coherent, we check
this feature. The CPUs with unified cache are so few, we could
consider replacing this feature bit with explicit checks against the
PVR.
This will make unifying the 32-bit and 64-bit cache flush code a
little more straightforward.
Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2007-06-13 11:52:57 +07:00
|
|
|
#define CPU_FTRS_7450_23 (CPU_FTR_COMMON | \
|
2007-11-10 05:17:49 +07:00
|
|
|
CPU_FTR_USE_TB | CPU_FTR_NEED_PAIRED_STWCX | \
|
2006-03-23 13:36:59 +07:00
|
|
|
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
|
2008-12-19 02:13:32 +07:00
|
|
|
CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \
|
2006-06-07 13:14:40 +07:00
|
|
|
CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
|
[POWERPC] Merge CPU features pertaining to icache coherency
Currently the powerpc kernel has a 64-bit only feature,
COHERENT_ICACHE used for those CPUS which maintain icache/dcache
coherency in hardware (POWER5, essentially). It also has a feature,
SPLIT_ID_CACHE, which is used on CPUs which have separate i and
d-caches, which is to say everything except 601 and Freescale E200.
In nearly all the places we check the SPLIT_ID_CACHE, what we actually
care about is whether the i and d-caches are coherent (which they will
be, trivially, if they're the same cache).
This tries to clarify the situation a little. The COHERENT_ICACHE
feature becomes availble on 32-bit and is set for all CPUs where i and
d-cache are effectively coherent, whether this is due to special logic
(POWER5) or because they're unified. We check this, instead of
SPLIT_ID_CACHE nearly everywhere.
The SPLIT_ID_CACHE feature itself is replaced by a UNIFIED_ID_CACHE
feature with reversed sense, set only on 601 and Freescale E200. In
the two places (one Freescale BookE specific) where we really care
whether it's a unified cache, not whether they're coherent, we check
this feature. The CPUs with unified cache are so few, we could
consider replacing this feature bit with explicit checks against the
PVR.
This will make unifying the 32-bit and 64-bit cache flush code a
little more straightforward.
Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2007-06-13 11:52:57 +07:00
|
|
|
#define CPU_FTRS_7455_1 (CPU_FTR_COMMON | \
|
2007-11-10 05:17:49 +07:00
|
|
|
CPU_FTR_USE_TB | CPU_FTR_NEED_PAIRED_STWCX | \
|
2006-03-23 13:36:59 +07:00
|
|
|
CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | CPU_FTR_L3CR | \
|
2008-12-19 02:13:32 +07:00
|
|
|
CPU_FTR_SPEC7450 | CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
|
[POWERPC] Merge CPU features pertaining to icache coherency
Currently the powerpc kernel has a 64-bit only feature,
COHERENT_ICACHE used for those CPUS which maintain icache/dcache
coherency in hardware (POWER5, essentially). It also has a feature,
SPLIT_ID_CACHE, which is used on CPUs which have separate i and
d-caches, which is to say everything except 601 and Freescale E200.
In nearly all the places we check the SPLIT_ID_CACHE, what we actually
care about is whether the i and d-caches are coherent (which they will
be, trivially, if they're the same cache).
This tries to clarify the situation a little. The COHERENT_ICACHE
feature becomes availble on 32-bit and is set for all CPUs where i and
d-cache are effectively coherent, whether this is due to special logic
(POWER5) or because they're unified. We check this, instead of
SPLIT_ID_CACHE nearly everywhere.
The SPLIT_ID_CACHE feature itself is replaced by a UNIFIED_ID_CACHE
feature with reversed sense, set only on 601 and Freescale E200. In
the two places (one Freescale BookE specific) where we really care
whether it's a unified cache, not whether they're coherent, we check
this feature. The CPUs with unified cache are so few, we could
consider replacing this feature bit with explicit checks against the
PVR.
This will make unifying the 32-bit and 64-bit cache flush code a
little more straightforward.
Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2007-06-13 11:52:57 +07:00
|
|
|
#define CPU_FTRS_7455_20 (CPU_FTR_COMMON | \
|
2007-11-10 05:17:49 +07:00
|
|
|
CPU_FTR_USE_TB | CPU_FTR_NEED_PAIRED_STWCX | \
|
2006-03-23 13:36:59 +07:00
|
|
|
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
|
2008-12-19 02:13:32 +07:00
|
|
|
CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \
|
2006-03-23 13:36:59 +07:00
|
|
|
CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_L3_DISABLE_NAP | \
|
2008-12-19 02:13:32 +07:00
|
|
|
CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
|
[POWERPC] Merge CPU features pertaining to icache coherency
Currently the powerpc kernel has a 64-bit only feature,
COHERENT_ICACHE used for those CPUS which maintain icache/dcache
coherency in hardware (POWER5, essentially). It also has a feature,
SPLIT_ID_CACHE, which is used on CPUs which have separate i and
d-caches, which is to say everything except 601 and Freescale E200.
In nearly all the places we check the SPLIT_ID_CACHE, what we actually
care about is whether the i and d-caches are coherent (which they will
be, trivially, if they're the same cache).
This tries to clarify the situation a little. The COHERENT_ICACHE
feature becomes availble on 32-bit and is set for all CPUs where i and
d-cache are effectively coherent, whether this is due to special logic
(POWER5) or because they're unified. We check this, instead of
SPLIT_ID_CACHE nearly everywhere.
The SPLIT_ID_CACHE feature itself is replaced by a UNIFIED_ID_CACHE
feature with reversed sense, set only on 601 and Freescale E200. In
the two places (one Freescale BookE specific) where we really care
whether it's a unified cache, not whether they're coherent, we check
this feature. The CPUs with unified cache are so few, we could
consider replacing this feature bit with explicit checks against the
PVR.
This will make unifying the 32-bit and 64-bit cache flush code a
little more straightforward.
Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2007-06-13 11:52:57 +07:00
|
|
|
#define CPU_FTRS_7455 (CPU_FTR_COMMON | \
|
2006-03-23 13:36:59 +07:00
|
|
|
CPU_FTR_USE_TB | \
|
|
|
|
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
|
2008-12-19 02:13:32 +07:00
|
|
|
CPU_FTR_L3CR | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
|
2007-11-10 05:17:49 +07:00
|
|
|
CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
|
[POWERPC] Merge CPU features pertaining to icache coherency
Currently the powerpc kernel has a 64-bit only feature,
COHERENT_ICACHE used for those CPUS which maintain icache/dcache
coherency in hardware (POWER5, essentially). It also has a feature,
SPLIT_ID_CACHE, which is used on CPUs which have separate i and
d-caches, which is to say everything except 601 and Freescale E200.
In nearly all the places we check the SPLIT_ID_CACHE, what we actually
care about is whether the i and d-caches are coherent (which they will
be, trivially, if they're the same cache).
This tries to clarify the situation a little. The COHERENT_ICACHE
feature becomes availble on 32-bit and is set for all CPUs where i and
d-cache are effectively coherent, whether this is due to special logic
(POWER5) or because they're unified. We check this, instead of
SPLIT_ID_CACHE nearly everywhere.
The SPLIT_ID_CACHE feature itself is replaced by a UNIFIED_ID_CACHE
feature with reversed sense, set only on 601 and Freescale E200. In
the two places (one Freescale BookE specific) where we really care
whether it's a unified cache, not whether they're coherent, we check
this feature. The CPUs with unified cache are so few, we could
consider replacing this feature bit with explicit checks against the
PVR.
This will make unifying the 32-bit and 64-bit cache flush code a
little more straightforward.
Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2007-06-13 11:52:57 +07:00
|
|
|
#define CPU_FTRS_7447_10 (CPU_FTR_COMMON | \
|
2006-03-23 13:36:59 +07:00
|
|
|
CPU_FTR_USE_TB | \
|
|
|
|
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
|
2008-12-19 02:13:32 +07:00
|
|
|
CPU_FTR_L3CR | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
|
2007-11-10 05:17:49 +07:00
|
|
|
CPU_FTR_NEED_COHERENT | CPU_FTR_NO_BTIC | CPU_FTR_PPC_LE | \
|
|
|
|
CPU_FTR_NEED_PAIRED_STWCX)
|
[POWERPC] Merge CPU features pertaining to icache coherency
Currently the powerpc kernel has a 64-bit only feature,
COHERENT_ICACHE used for those CPUS which maintain icache/dcache
coherency in hardware (POWER5, essentially). It also has a feature,
SPLIT_ID_CACHE, which is used on CPUs which have separate i and
d-caches, which is to say everything except 601 and Freescale E200.
In nearly all the places we check the SPLIT_ID_CACHE, what we actually
care about is whether the i and d-caches are coherent (which they will
be, trivially, if they're the same cache).
This tries to clarify the situation a little. The COHERENT_ICACHE
feature becomes availble on 32-bit and is set for all CPUs where i and
d-cache are effectively coherent, whether this is due to special logic
(POWER5) or because they're unified. We check this, instead of
SPLIT_ID_CACHE nearly everywhere.
The SPLIT_ID_CACHE feature itself is replaced by a UNIFIED_ID_CACHE
feature with reversed sense, set only on 601 and Freescale E200. In
the two places (one Freescale BookE specific) where we really care
whether it's a unified cache, not whether they're coherent, we check
this feature. The CPUs with unified cache are so few, we could
consider replacing this feature bit with explicit checks against the
PVR.
This will make unifying the 32-bit and 64-bit cache flush code a
little more straightforward.
Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2007-06-13 11:52:57 +07:00
|
|
|
#define CPU_FTRS_7447 (CPU_FTR_COMMON | \
|
2006-03-23 13:36:59 +07:00
|
|
|
CPU_FTR_USE_TB | \
|
|
|
|
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
|
2008-12-19 02:13:32 +07:00
|
|
|
CPU_FTR_L3CR | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
|
2007-11-10 05:17:49 +07:00
|
|
|
CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
|
[POWERPC] Merge CPU features pertaining to icache coherency
Currently the powerpc kernel has a 64-bit only feature,
COHERENT_ICACHE used for those CPUS which maintain icache/dcache
coherency in hardware (POWER5, essentially). It also has a feature,
SPLIT_ID_CACHE, which is used on CPUs which have separate i and
d-caches, which is to say everything except 601 and Freescale E200.
In nearly all the places we check the SPLIT_ID_CACHE, what we actually
care about is whether the i and d-caches are coherent (which they will
be, trivially, if they're the same cache).
This tries to clarify the situation a little. The COHERENT_ICACHE
feature becomes availble on 32-bit and is set for all CPUs where i and
d-cache are effectively coherent, whether this is due to special logic
(POWER5) or because they're unified. We check this, instead of
SPLIT_ID_CACHE nearly everywhere.
The SPLIT_ID_CACHE feature itself is replaced by a UNIFIED_ID_CACHE
feature with reversed sense, set only on 601 and Freescale E200. In
the two places (one Freescale BookE specific) where we really care
whether it's a unified cache, not whether they're coherent, we check
this feature. The CPUs with unified cache are so few, we could
consider replacing this feature bit with explicit checks against the
PVR.
This will make unifying the 32-bit and 64-bit cache flush code a
little more straightforward.
Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2007-06-13 11:52:57 +07:00
|
|
|
#define CPU_FTRS_7447A (CPU_FTR_COMMON | \
|
2006-03-23 13:36:59 +07:00
|
|
|
CPU_FTR_USE_TB | \
|
|
|
|
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
|
2008-12-19 02:13:32 +07:00
|
|
|
CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
|
2007-11-10 05:17:49 +07:00
|
|
|
CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
|
[POWERPC] Merge CPU features pertaining to icache coherency
Currently the powerpc kernel has a 64-bit only feature,
COHERENT_ICACHE used for those CPUS which maintain icache/dcache
coherency in hardware (POWER5, essentially). It also has a feature,
SPLIT_ID_CACHE, which is used on CPUs which have separate i and
d-caches, which is to say everything except 601 and Freescale E200.
In nearly all the places we check the SPLIT_ID_CACHE, what we actually
care about is whether the i and d-caches are coherent (which they will
be, trivially, if they're the same cache).
This tries to clarify the situation a little. The COHERENT_ICACHE
feature becomes availble on 32-bit and is set for all CPUs where i and
d-cache are effectively coherent, whether this is due to special logic
(POWER5) or because they're unified. We check this, instead of
SPLIT_ID_CACHE nearly everywhere.
The SPLIT_ID_CACHE feature itself is replaced by a UNIFIED_ID_CACHE
feature with reversed sense, set only on 601 and Freescale E200. In
the two places (one Freescale BookE specific) where we really care
whether it's a unified cache, not whether they're coherent, we check
this feature. The CPUs with unified cache are so few, we could
consider replacing this feature bit with explicit checks against the
PVR.
This will make unifying the 32-bit and 64-bit cache flush code a
little more straightforward.
Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2007-06-13 11:52:57 +07:00
|
|
|
#define CPU_FTRS_7448 (CPU_FTR_COMMON | \
|
2007-05-03 04:34:43 +07:00
|
|
|
CPU_FTR_USE_TB | \
|
|
|
|
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
|
2008-12-19 02:13:32 +07:00
|
|
|
CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
|
2007-11-10 05:17:49 +07:00
|
|
|
CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
|
[POWERPC] Merge CPU features pertaining to icache coherency
Currently the powerpc kernel has a 64-bit only feature,
COHERENT_ICACHE used for those CPUS which maintain icache/dcache
coherency in hardware (POWER5, essentially). It also has a feature,
SPLIT_ID_CACHE, which is used on CPUs which have separate i and
d-caches, which is to say everything except 601 and Freescale E200.
In nearly all the places we check the SPLIT_ID_CACHE, what we actually
care about is whether the i and d-caches are coherent (which they will
be, trivially, if they're the same cache).
This tries to clarify the situation a little. The COHERENT_ICACHE
feature becomes availble on 32-bit and is set for all CPUs where i and
d-cache are effectively coherent, whether this is due to special logic
(POWER5) or because they're unified. We check this, instead of
SPLIT_ID_CACHE nearly everywhere.
The SPLIT_ID_CACHE feature itself is replaced by a UNIFIED_ID_CACHE
feature with reversed sense, set only on 601 and Freescale E200. In
the two places (one Freescale BookE specific) where we really care
whether it's a unified cache, not whether they're coherent, we check
this feature. The CPUs with unified cache are so few, we could
consider replacing this feature bit with explicit checks against the
PVR.
This will make unifying the 32-bit and 64-bit cache flush code a
little more straightforward.
Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2007-06-13 11:52:57 +07:00
|
|
|
#define CPU_FTRS_82XX (CPU_FTR_COMMON | \
|
2006-03-23 13:36:59 +07:00
|
|
|
CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB)
|
2007-09-15 03:32:14 +07:00
|
|
|
#define CPU_FTRS_G2_LE (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | \
|
2008-12-19 02:13:32 +07:00
|
|
|
CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP)
|
[POWERPC] Merge CPU features pertaining to icache coherency
Currently the powerpc kernel has a 64-bit only feature,
COHERENT_ICACHE used for those CPUS which maintain icache/dcache
coherency in hardware (POWER5, essentially). It also has a feature,
SPLIT_ID_CACHE, which is used on CPUs which have separate i and
d-caches, which is to say everything except 601 and Freescale E200.
In nearly all the places we check the SPLIT_ID_CACHE, what we actually
care about is whether the i and d-caches are coherent (which they will
be, trivially, if they're the same cache).
This tries to clarify the situation a little. The COHERENT_ICACHE
feature becomes availble on 32-bit and is set for all CPUs where i and
d-cache are effectively coherent, whether this is due to special logic
(POWER5) or because they're unified. We check this, instead of
SPLIT_ID_CACHE nearly everywhere.
The SPLIT_ID_CACHE feature itself is replaced by a UNIFIED_ID_CACHE
feature with reversed sense, set only on 601 and Freescale E200. In
the two places (one Freescale BookE specific) where we really care
whether it's a unified cache, not whether they're coherent, we check
this feature. The CPUs with unified cache are so few, we could
consider replacing this feature bit with explicit checks against the
PVR.
This will make unifying the 32-bit and 64-bit cache flush code a
little more straightforward.
Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2007-06-13 11:52:57 +07:00
|
|
|
#define CPU_FTRS_E300 (CPU_FTR_MAYBE_CAN_DOZE | \
|
2008-12-19 02:13:32 +07:00
|
|
|
CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | \
|
2006-03-23 13:36:59 +07:00
|
|
|
CPU_FTR_COMMON)
|
[POWERPC] Merge CPU features pertaining to icache coherency
Currently the powerpc kernel has a 64-bit only feature,
COHERENT_ICACHE used for those CPUS which maintain icache/dcache
coherency in hardware (POWER5, essentially). It also has a feature,
SPLIT_ID_CACHE, which is used on CPUs which have separate i and
d-caches, which is to say everything except 601 and Freescale E200.
In nearly all the places we check the SPLIT_ID_CACHE, what we actually
care about is whether the i and d-caches are coherent (which they will
be, trivially, if they're the same cache).
This tries to clarify the situation a little. The COHERENT_ICACHE
feature becomes availble on 32-bit and is set for all CPUs where i and
d-cache are effectively coherent, whether this is due to special logic
(POWER5) or because they're unified. We check this, instead of
SPLIT_ID_CACHE nearly everywhere.
The SPLIT_ID_CACHE feature itself is replaced by a UNIFIED_ID_CACHE
feature with reversed sense, set only on 601 and Freescale E200. In
the two places (one Freescale BookE specific) where we really care
whether it's a unified cache, not whether they're coherent, we check
this feature. The CPUs with unified cache are so few, we could
consider replacing this feature bit with explicit checks against the
PVR.
This will make unifying the 32-bit and 64-bit cache flush code a
little more straightforward.
Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2007-06-13 11:52:57 +07:00
|
|
|
#define CPU_FTRS_E300C2 (CPU_FTR_MAYBE_CAN_DOZE | \
|
2008-12-19 02:13:32 +07:00
|
|
|
CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | \
|
2006-12-08 15:43:30 +07:00
|
|
|
CPU_FTR_COMMON | CPU_FTR_FPU_UNAVAILABLE)
|
2008-12-19 02:13:32 +07:00
|
|
|
#define CPU_FTRS_CLASSIC32 (CPU_FTR_COMMON | CPU_FTR_USE_TB)
|
[POWERPC] Merge CPU features pertaining to icache coherency
Currently the powerpc kernel has a 64-bit only feature,
COHERENT_ICACHE used for those CPUS which maintain icache/dcache
coherency in hardware (POWER5, essentially). It also has a feature,
SPLIT_ID_CACHE, which is used on CPUs which have separate i and
d-caches, which is to say everything except 601 and Freescale E200.
In nearly all the places we check the SPLIT_ID_CACHE, what we actually
care about is whether the i and d-caches are coherent (which they will
be, trivially, if they're the same cache).
This tries to clarify the situation a little. The COHERENT_ICACHE
feature becomes availble on 32-bit and is set for all CPUs where i and
d-cache are effectively coherent, whether this is due to special logic
(POWER5) or because they're unified. We check this, instead of
SPLIT_ID_CACHE nearly everywhere.
The SPLIT_ID_CACHE feature itself is replaced by a UNIFIED_ID_CACHE
feature with reversed sense, set only on 601 and Freescale E200. In
the two places (one Freescale BookE specific) where we really care
whether it's a unified cache, not whether they're coherent, we check
this feature. The CPUs with unified cache are so few, we could
consider replacing this feature bit with explicit checks against the
PVR.
This will make unifying the 32-bit and 64-bit cache flush code a
little more straightforward.
Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2007-06-13 11:52:57 +07:00
|
|
|
#define CPU_FTRS_8XX (CPU_FTR_USE_TB)
|
2008-12-12 13:33:25 +07:00
|
|
|
#define CPU_FTRS_40X (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
|
|
|
|
#define CPU_FTRS_44X (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
|
2008-12-19 02:13:22 +07:00
|
|
|
#define CPU_FTRS_440x6 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE | \
|
|
|
|
CPU_FTR_INDEXED_DCR)
|
2010-03-05 17:43:12 +07:00
|
|
|
#define CPU_FTRS_47X (CPU_FTRS_440x6)
|
2007-09-13 13:44:20 +07:00
|
|
|
#define CPU_FTRS_E200 (CPU_FTR_USE_TB | CPU_FTR_SPE_COMP | \
|
|
|
|
CPU_FTR_NODSISRALIGN | CPU_FTR_COHERENT_ICACHE | \
|
2011-12-20 22:34:12 +07:00
|
|
|
CPU_FTR_UNIFIED_ID_CACHE | CPU_FTR_NOEXECUTE | \
|
|
|
|
CPU_FTR_DEBUG_LVL_EXC)
|
2008-06-19 04:26:52 +07:00
|
|
|
#define CPU_FTRS_E500 (CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \
|
2008-12-12 13:33:25 +07:00
|
|
|
CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NODSISRALIGN | \
|
|
|
|
CPU_FTR_NOEXECUTE)
|
2008-06-19 04:26:52 +07:00
|
|
|
#define CPU_FTRS_E500_2 (CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \
|
2008-12-19 02:13:32 +07:00
|
|
|
CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | \
|
2008-12-12 13:33:25 +07:00
|
|
|
CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
|
2010-05-28 05:35:12 +07:00
|
|
|
#define CPU_FTRS_E500MC (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
|
2009-02-12 20:54:53 +07:00
|
|
|
CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
|
2011-12-20 22:34:47 +07:00
|
|
|
CPU_FTR_DBELL | CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV)
|
2011-04-06 12:11:06 +07:00
|
|
|
#define CPU_FTRS_E5500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
|
|
|
|
CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
|
2011-04-06 12:18:48 +07:00
|
|
|
CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
|
2011-12-20 22:34:47 +07:00
|
|
|
CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV)
|
2011-11-07 00:51:07 +07:00
|
|
|
#define CPU_FTRS_E6500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
|
|
|
|
CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
|
|
|
|
CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
|
2012-04-12 03:27:52 +07:00
|
|
|
CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV)
|
2006-03-23 13:36:59 +07:00
|
|
|
#define CPU_FTRS_GENERIC_32 (CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN)
|
2006-11-23 06:46:46 +07:00
|
|
|
|
|
|
|
/* 64-bit CPUs */
|
2010-02-10 08:10:25 +07:00
|
|
|
#define CPU_FTRS_POWER3 (CPU_FTR_USE_TB | \
|
2008-12-19 02:13:32 +07:00
|
|
|
CPU_FTR_IABR | CPU_FTR_PPC_LE)
|
2010-02-10 08:10:25 +07:00
|
|
|
#define CPU_FTRS_RS64 (CPU_FTR_USE_TB | \
|
2008-12-19 02:13:32 +07:00
|
|
|
CPU_FTR_IABR | \
|
2006-03-23 13:36:59 +07:00
|
|
|
CPU_FTR_MMCRA | CPU_FTR_CTRL)
|
2008-07-01 22:16:40 +07:00
|
|
|
#define CPU_FTRS_POWER4 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
|
2008-12-19 02:13:32 +07:00
|
|
|
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
|
2010-08-11 08:40:27 +07:00
|
|
|
CPU_FTR_MMCRA | CPU_FTR_CP_USE_DCBTZ | \
|
|
|
|
CPU_FTR_STCX_CHECKS_ADDRESS)
|
2008-07-01 22:16:40 +07:00
|
|
|
#define CPU_FTRS_PPC970 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
|
powerpc, KVM: Split HVMODE_206 cpu feature bit into separate HV and architecture bits
This replaces the single CPU_FTR_HVMODE_206 bit with two bits, one to
indicate that we have a usable hypervisor mode, and another to indicate
that the processor conforms to PowerISA version 2.06. We also add
another bit to indicate that the processor conforms to ISA version 2.01
and set that for PPC970 and derivatives.
Some PPC970 chips (specifically those in Apple machines) have a
hypervisor mode in that MSR[HV] is always 1, but the hypervisor mode
is not useful in the sense that there is no way to run any code in
supervisor mode (HV=0 PR=0). On these processors, the LPES0 and LPES1
bits in HID4 are always 0, and we use that as a way of detecting that
hypervisor mode is not useful.
Where we have a feature section in assembly code around code that
only applies on POWER7 in hypervisor mode, we use a construct like
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
The definition of END_FTR_SECTION_IFSET is such that the code will
be enabled (not overwritten with nops) only if all bits in the
provided mask are set.
Note that the CPU feature check in __tlbie() only needs to check the
ARCH_206 bit, not the HVMODE bit, because __tlbie() can only get called
if we are running bare-metal, i.e. in hypervisor mode.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 07:26:11 +07:00
|
|
|
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_201 | \
|
2008-08-22 11:36:19 +07:00
|
|
|
CPU_FTR_ALTIVEC_COMP | CPU_FTR_CAN_NAP | CPU_FTR_MMCRA | \
|
powerpc, KVM: Split HVMODE_206 cpu feature bit into separate HV and architecture bits
This replaces the single CPU_FTR_HVMODE_206 bit with two bits, one to
indicate that we have a usable hypervisor mode, and another to indicate
that the processor conforms to PowerISA version 2.06. We also add
another bit to indicate that the processor conforms to ISA version 2.01
and set that for PPC970 and derivatives.
Some PPC970 chips (specifically those in Apple machines) have a
hypervisor mode in that MSR[HV] is always 1, but the hypervisor mode
is not useful in the sense that there is no way to run any code in
supervisor mode (HV=0 PR=0). On these processors, the LPES0 and LPES1
bits in HID4 are always 0, and we use that as a way of detecting that
hypervisor mode is not useful.
Where we have a feature section in assembly code around code that
only applies on POWER7 in hypervisor mode, we use a construct like
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
The definition of END_FTR_SECTION_IFSET is such that the code will
be enabled (not overwritten with nops) only if all bits in the
provided mask are set.
Note that the CPU feature check in __tlbie() only needs to check the
ARCH_206 bit, not the HVMODE bit, because __tlbie() can only get called
if we are running bare-metal, i.e. in hypervisor mode.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 07:26:11 +07:00
|
|
|
CPU_FTR_CP_USE_DCBTZ | CPU_FTR_STCX_CHECKS_ADDRESS | \
|
|
|
|
CPU_FTR_HVMODE)
|
2008-07-01 22:16:40 +07:00
|
|
|
#define CPU_FTRS_POWER5 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
|
2008-12-19 02:13:32 +07:00
|
|
|
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
|
2006-03-23 13:36:59 +07:00
|
|
|
CPU_FTR_MMCRA | CPU_FTR_SMT | \
|
2011-04-07 02:48:50 +07:00
|
|
|
CPU_FTR_COHERENT_ICACHE | CPU_FTR_PURR | \
|
|
|
|
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB)
|
2008-07-01 22:16:40 +07:00
|
|
|
#define CPU_FTRS_POWER6 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
|
2008-12-19 02:13:32 +07:00
|
|
|
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
|
2006-04-29 06:51:06 +07:00
|
|
|
CPU_FTR_MMCRA | CPU_FTR_SMT | \
|
2011-04-07 02:48:50 +07:00
|
|
|
CPU_FTR_COHERENT_ICACHE | \
|
2006-12-08 13:46:58 +07:00
|
|
|
CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
|
2010-08-11 08:40:27 +07:00
|
|
|
CPU_FTR_DSCR | CPU_FTR_UNALIGNED_LD_STD | \
|
2011-05-02 02:48:20 +07:00
|
|
|
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_CFAR)
|
2008-07-01 22:16:40 +07:00
|
|
|
#define CPU_FTRS_POWER7 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
|
powerpc, KVM: Split HVMODE_206 cpu feature bit into separate HV and architecture bits
This replaces the single CPU_FTR_HVMODE_206 bit with two bits, one to
indicate that we have a usable hypervisor mode, and another to indicate
that the processor conforms to PowerISA version 2.06. We also add
another bit to indicate that the processor conforms to ISA version 2.01
and set that for PPC970 and derivatives.
Some PPC970 chips (specifically those in Apple machines) have a
hypervisor mode in that MSR[HV] is always 1, but the hypervisor mode
is not useful in the sense that there is no way to run any code in
supervisor mode (HV=0 PR=0). On these processors, the LPES0 and LPES1
bits in HID4 are always 0, and we use that as a way of detecting that
hypervisor mode is not useful.
Where we have a feature section in assembly code around code that
only applies on POWER7 in hypervisor mode, we use a construct like
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
The definition of END_FTR_SECTION_IFSET is such that the code will
be enabled (not overwritten with nops) only if all bits in the
provided mask are set.
Note that the CPU feature check in __tlbie() only needs to check the
ARCH_206 bit, not the HVMODE bit, because __tlbie() can only get called
if we are running bare-metal, i.e. in hypervisor mode.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 07:26:11 +07:00
|
|
|
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
|
2008-06-18 07:47:26 +07:00
|
|
|
CPU_FTR_MMCRA | CPU_FTR_SMT | \
|
2011-04-07 02:48:50 +07:00
|
|
|
CPU_FTR_COHERENT_ICACHE | \
|
2008-06-18 07:47:26 +07:00
|
|
|
CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
|
2010-08-11 08:40:27 +07:00
|
|
|
CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT | \
|
2011-05-03 03:43:04 +07:00
|
|
|
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
|
powerpc: POWER7 optimised copy_to_user/copy_from_user using VMX
Implement a POWER7 optimised copy_to_user/copy_from_user using VMX.
For large aligned copies this new loop is over 10% faster, and for
large unaligned copies it is over 200% faster.
If we take a fault we fall back to the old version, this keeps
things relatively simple and easy to verify.
On POWER7 unaligned stores rarely slow down - they only flush when
a store crosses a 4KB page boundary. Furthermore this flush is
handled completely in hardware and should be 20-30 cycles.
Unaligned loads on the other hand flush much more often - whenever
crossing a 128 byte cache line, or a 32 byte sector if either sector
is an L1 miss.
Considering this information we really want to get the loads aligned
and not worry about the alignment of the stores. Microbenchmarks
confirm that this approach is much faster than the current unaligned
copy loop that uses shifts and rotates to ensure both loads and
stores are aligned.
We also want to try and do the stores in cacheline aligned, cacheline
sized chunks. If the store queue is unable to merge an entire
cacheline of stores then the L2 cache will have to do a
read/modify/write. Even worse, we will serialise this with the stores
in the next iteration of the copy loop since both iterations hit
the same cacheline.
Based on this, the new loop does the following things:
1 - 127 bytes
Get the source 8 byte aligned and use 8 byte loads and stores. Pretty
boring and similar to how the current loop works.
128 - 4095 bytes
Get the source 8 byte aligned and use 8 byte loads and stores,
1 cacheline at a time. We aren't doing the stores in cacheline
aligned chunks so we will potentially serialise once per cacheline.
Even so it is much better than the loop we have today.
4096 - bytes
If both source and destination have the same alignment get them both
16 byte aligned, then get the destination cacheline aligned. Do
cacheline sized loads and stores using VMX.
If source and destination do not have the same alignment, we get the
destination cacheline aligned, and use permute to do aligned loads.
In both cases the VMX loop should be optimal - we always do aligned
loads and stores and are always doing stores in cacheline aligned,
cacheline sized chunks.
To be able to use VMX we must be careful about interrupts and
sleeping. We don't use the VMX loop when in an interrupt (which should
be rare anyway) and we wrap the VMX loop in disable/enable_pagefault
and fall back to the existing copy_tofrom_user loop if we do need to
sleep.
The VMX breakpoint of 4096 bytes was chosen using this microbenchmark:
http://ozlabs.org/~anton/junkcode/copy_to_user.c
Since we are using VMX and there is a cost to saving and restoring
the user VMX state there are two broad cases we need to benchmark:
- Best case - userspace never uses VMX
- Worst case - userspace always uses VMX
In reality a userspace process will sit somewhere between these two
extremes. Since we need to test both aligned and unaligned copies we
end up with 4 combinations. The point at which the VMX loop begins to
win is:
0% VMX
aligned 2048 bytes
unaligned 2048 bytes
100% VMX
aligned 16384 bytes
unaligned 8192 bytes
Considering this is a microbenchmark, the data is hot in cache and
the VMX loop has better store queue merging properties we set the
breakpoint to 4096 bytes, a little below the unaligned breakpoints.
Some future optimisations we can look at:
- Looking at the perf data, a significant part of the cost when a
task is always using VMX is the extra exception we take to restore
the VMX state. As such we should do something similar to the x86
optimisation that restores FPU state for heavy users. ie:
/*
* If the task has used fpu the last 5 timeslices, just do a full
* restore of the math state immediately to avoid the trap; the
* chances of needing FPU soon are obviously high now
*/
preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
and
/*
* fpu_counter contains the number of consecutive context switches
* that the FPU is used. If this is over a threshold, the lazy fpu
* saving becomes unlazy to save the trap. This is an unsigned char
* so that after 256 times the counter wraps and the behavior turns
* lazy again; this to deal with bursty apps that only use FPU for
* a short time
*/
- We could create a paca bit to mirror the VMX enabled MSR bit and check
that first, avoiding multiple calls to calling enable_kernel_altivec.
That should help with iovec based system calls like readv.
- We could have two VMX breakpoints, one for when we know the user VMX
state is loaded into the registers and one when it isn't. This could
be a second bit in the paca so we can calculate the break points quickly.
- One suggestion from Ben was to save and restore the VSX registers
we use inline instead of using enable_kernel_altivec.
[BenH: Fixed a problem with preempt and fixed build without CONFIG_ALTIVEC]
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2011-12-08 03:11:45 +07:00
|
|
|
CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY)
|
2012-10-31 02:34:15 +07:00
|
|
|
#define CPU_FTRS_POWER8 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
|
|
|
|
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
|
|
|
|
CPU_FTR_MMCRA | CPU_FTR_SMT | \
|
|
|
|
CPU_FTR_COHERENT_ICACHE | \
|
|
|
|
CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
|
|
|
|
CPU_FTR_DSCR | CPU_FTR_SAO | \
|
|
|
|
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
|
|
|
|
CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY)
|
2008-07-01 22:16:40 +07:00
|
|
|
#define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
|
2008-12-19 02:13:32 +07:00
|
|
|
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
|
2006-03-23 13:36:59 +07:00
|
|
|
CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
|
2011-04-07 02:48:50 +07:00
|
|
|
CPU_FTR_PAUSE_ZERO | CPU_FTR_CELL_TB_BUG | CPU_FTR_CP_USE_DCBTZ | \
|
2008-10-27 07:43:02 +07:00
|
|
|
CPU_FTR_UNALIGNED_LD_STD)
|
2008-07-01 22:16:40 +07:00
|
|
|
#define CPU_FTRS_PA6T (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
|
2011-04-07 02:48:50 +07:00
|
|
|
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | \
|
|
|
|
CPU_FTR_PURR | CPU_FTR_REAL_LE)
|
2008-12-19 02:13:32 +07:00
|
|
|
#define CPU_FTRS_COMPATIBLE (CPU_FTR_USE_TB | CPU_FTR_PPCAS_ARCH_V2)
|
2005-09-24 02:08:58 +07:00
|
|
|
|
2011-04-15 05:32:01 +07:00
|
|
|
#define CPU_FTRS_A2 (CPU_FTR_USE_TB | CPU_FTR_SMT | CPU_FTR_DBELL | \
|
2011-09-29 17:55:13 +07:00
|
|
|
CPU_FTR_NOEXECUTE | CPU_FTR_NODSISRALIGN | CPU_FTR_ICSWX)
|
2011-04-15 05:32:01 +07:00
|
|
|
|
2005-12-13 03:45:33 +07:00
|
|
|
#ifdef __powerpc64__
|
2011-04-06 12:11:06 +07:00
|
|
|
#ifdef CONFIG_PPC_BOOK3E
|
2011-11-07 00:51:07 +07:00
|
|
|
#define CPU_FTRS_POSSIBLE (CPU_FTRS_E6500 | CPU_FTRS_E5500 | CPU_FTRS_A2)
|
2011-04-06 12:11:06 +07:00
|
|
|
#else
|
2006-03-23 13:36:59 +07:00
|
|
|
#define CPU_FTRS_POSSIBLE \
|
|
|
|
(CPU_FTRS_POWER3 | CPU_FTRS_RS64 | CPU_FTRS_POWER4 | \
|
2006-04-29 06:51:06 +07:00
|
|
|
CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | CPU_FTRS_POWER6 | \
|
2012-10-31 02:34:15 +07:00
|
|
|
CPU_FTRS_POWER7 | CPU_FTRS_POWER8 | CPU_FTRS_CELL | \
|
|
|
|
CPU_FTRS_PA6T | CPU_FTR_VSX)
|
2011-04-06 12:11:06 +07:00
|
|
|
#endif
|
2005-12-13 03:45:33 +07:00
|
|
|
#else
|
2006-03-23 13:36:59 +07:00
|
|
|
enum {
|
|
|
|
CPU_FTRS_POSSIBLE =
|
2005-09-24 02:08:58 +07:00
|
|
|
#if CLASSIC_PPC
|
|
|
|
CPU_FTRS_PPC601 | CPU_FTRS_603 | CPU_FTRS_604 | CPU_FTRS_740_NOTAU |
|
|
|
|
CPU_FTRS_740 | CPU_FTRS_750 | CPU_FTRS_750FX1 |
|
|
|
|
CPU_FTRS_750FX2 | CPU_FTRS_750FX | CPU_FTRS_750GX |
|
|
|
|
CPU_FTRS_7400_NOTAU | CPU_FTRS_7400 | CPU_FTRS_7450_20 |
|
|
|
|
CPU_FTRS_7450_21 | CPU_FTRS_7450_23 | CPU_FTRS_7455_1 |
|
|
|
|
CPU_FTRS_7455_20 | CPU_FTRS_7455 | CPU_FTRS_7447_10 |
|
|
|
|
CPU_FTRS_7447 | CPU_FTRS_7447A | CPU_FTRS_82XX |
|
2006-12-08 15:43:30 +07:00
|
|
|
CPU_FTRS_G2_LE | CPU_FTRS_E300 | CPU_FTRS_E300C2 |
|
|
|
|
CPU_FTRS_CLASSIC32 |
|
2005-09-24 02:08:58 +07:00
|
|
|
#else
|
|
|
|
CPU_FTRS_GENERIC_32 |
|
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_8xx
|
|
|
|
CPU_FTRS_8XX |
|
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_40x
|
|
|
|
CPU_FTRS_40X |
|
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_44x
|
2008-12-19 02:13:22 +07:00
|
|
|
CPU_FTRS_44X | CPU_FTRS_440x6 |
|
2005-09-24 02:08:58 +07:00
|
|
|
#endif
|
2010-03-05 17:43:12 +07:00
|
|
|
#ifdef CONFIG_PPC_47x
|
2011-01-26 13:17:58 +07:00
|
|
|
CPU_FTRS_47X | CPU_FTR_476_DD2 |
|
2010-03-05 17:43:12 +07:00
|
|
|
#endif
|
2005-09-24 02:08:58 +07:00
|
|
|
#ifdef CONFIG_E200
|
|
|
|
CPU_FTRS_E200 |
|
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_E500
|
2011-12-20 22:34:14 +07:00
|
|
|
CPU_FTRS_E500 | CPU_FTRS_E500_2 |
|
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_PPC_E500MC
|
|
|
|
CPU_FTRS_E500MC | CPU_FTRS_E5500 | CPU_FTRS_E6500 |
|
2005-09-24 02:08:58 +07:00
|
|
|
#endif
|
|
|
|
0,
|
2006-03-23 13:36:59 +07:00
|
|
|
};
|
|
|
|
#endif /* __powerpc64__ */
|
2005-09-24 02:08:58 +07:00
|
|
|
|
2005-12-13 03:45:33 +07:00
|
|
|
#ifdef __powerpc64__
|
2011-04-06 12:11:06 +07:00
|
|
|
#ifdef CONFIG_PPC_BOOK3E
|
2011-11-07 00:51:07 +07:00
|
|
|
#define CPU_FTRS_ALWAYS (CPU_FTRS_E6500 & CPU_FTRS_E5500 & CPU_FTRS_A2)
|
2011-04-06 12:11:06 +07:00
|
|
|
#else
|
2006-03-23 13:36:59 +07:00
|
|
|
#define CPU_FTRS_ALWAYS \
|
|
|
|
(CPU_FTRS_POWER3 & CPU_FTRS_RS64 & CPU_FTRS_POWER4 & \
|
2006-04-29 06:51:06 +07:00
|
|
|
CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & CPU_FTRS_POWER6 & \
|
2008-06-18 07:47:26 +07:00
|
|
|
CPU_FTRS_POWER7 & CPU_FTRS_CELL & CPU_FTRS_PA6T & CPU_FTRS_POSSIBLE)
|
2011-04-06 12:11:06 +07:00
|
|
|
#endif
|
2005-12-13 03:45:33 +07:00
|
|
|
#else
|
2006-03-23 13:36:59 +07:00
|
|
|
enum {
|
|
|
|
CPU_FTRS_ALWAYS =
|
2005-09-24 02:08:58 +07:00
|
|
|
#if CLASSIC_PPC
|
|
|
|
CPU_FTRS_PPC601 & CPU_FTRS_603 & CPU_FTRS_604 & CPU_FTRS_740_NOTAU &
|
|
|
|
CPU_FTRS_740 & CPU_FTRS_750 & CPU_FTRS_750FX1 &
|
|
|
|
CPU_FTRS_750FX2 & CPU_FTRS_750FX & CPU_FTRS_750GX &
|
|
|
|
CPU_FTRS_7400_NOTAU & CPU_FTRS_7400 & CPU_FTRS_7450_20 &
|
|
|
|
CPU_FTRS_7450_21 & CPU_FTRS_7450_23 & CPU_FTRS_7455_1 &
|
|
|
|
CPU_FTRS_7455_20 & CPU_FTRS_7455 & CPU_FTRS_7447_10 &
|
|
|
|
CPU_FTRS_7447 & CPU_FTRS_7447A & CPU_FTRS_82XX &
|
2006-12-08 15:43:30 +07:00
|
|
|
CPU_FTRS_G2_LE & CPU_FTRS_E300 & CPU_FTRS_E300C2 &
|
|
|
|
CPU_FTRS_CLASSIC32 &
|
2005-09-24 02:08:58 +07:00
|
|
|
#else
|
|
|
|
CPU_FTRS_GENERIC_32 &
|
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_8xx
|
|
|
|
CPU_FTRS_8XX &
|
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_40x
|
|
|
|
CPU_FTRS_40X &
|
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_44x
|
2008-12-19 02:13:22 +07:00
|
|
|
CPU_FTRS_44X & CPU_FTRS_440x6 &
|
2005-09-24 02:08:58 +07:00
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_E200
|
|
|
|
CPU_FTRS_E200 &
|
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_E500
|
2011-12-20 22:34:14 +07:00
|
|
|
CPU_FTRS_E500 & CPU_FTRS_E500_2 &
|
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_PPC_E500MC
|
|
|
|
CPU_FTRS_E500MC & CPU_FTRS_E5500 & CPU_FTRS_E6500 &
|
2005-09-24 02:08:58 +07:00
|
|
|
#endif
|
2011-12-20 22:34:47 +07:00
|
|
|
~CPU_FTR_EMB_HV & /* can be removed at runtime */
|
2005-09-24 02:08:58 +07:00
|
|
|
CPU_FTRS_POSSIBLE,
|
|
|
|
};
|
2006-03-23 13:36:59 +07:00
|
|
|
#endif /* __powerpc64__ */
|
2005-09-24 02:08:58 +07:00
|
|
|
|
|
|
|
static inline int cpu_has_feature(unsigned long feature)
|
|
|
|
{
|
|
|
|
return (CPU_FTRS_ALWAYS & feature) ||
|
|
|
|
(CPU_FTRS_POSSIBLE
|
|
|
|
& cur_cpu_spec->cpu_features
|
|
|
|
& feature);
|
|
|
|
}
|
|
|
|
|
2010-06-15 13:05:19 +07:00
|
|
|
#define HBP_NUM 1
|
|
|
|
|
2005-09-24 02:08:58 +07:00
|
|
|
#endif /* !__ASSEMBLY__ */
|
|
|
|
|
|
|
|
#endif /* __ASM_POWERPC_CPUTABLE_H */
|