linux_dsm_epyc7002/arch/tile/include/asm/irqflags.h
Christoph Lameter b4f501916c tile: Replace __get_cpu_var uses
__get_cpu_var() is used for multiple purposes in the kernel source. One of
them is address calculation via the form &__get_cpu_var(x).  This calculates
the address for the instance of the percpu variable of the current processor
based on an offset.

Other use cases are for storing and retrieving data from the current
processors percpu area.  __get_cpu_var() can be used as an lvalue when
writing data or on the right side of an assignment.

__get_cpu_var() is defined as :

#define __get_cpu_var(var) (*this_cpu_ptr(&(var)))

__get_cpu_var() always only does an address determination. However, store
and retrieve operations could use a segment prefix (or global register on
other platforms) to avoid the address calculation.

this_cpu_write() and this_cpu_read() can directly take an offset into a
percpu area and use optimized assembly code to read and write per cpu
variables.

This patch converts __get_cpu_var into either an explicit address
calculation using this_cpu_ptr() or into a use of this_cpu operations that
use the offset.  Thereby address calculations are avoided and less registers
are used when code is generated.

At the end of the patch set all uses of __get_cpu_var have been removed so
the macro is removed too.

The patch set includes passes over all arches as well. Once these operations
are used throughout then specialized macros can be defined in non -x86
arches as well in order to optimize per cpu access by f.e.  using a global
register that may be set to the per cpu base.

Transformations done to __get_cpu_var()

1. Determine the address of the percpu instance of the current processor.

	DEFINE_PER_CPU(int, y);
	int *x = &__get_cpu_var(y);

    Converts to

	int *x = this_cpu_ptr(&y);

2. Same as #1 but this time an array structure is involved.

	DEFINE_PER_CPU(int, y[20]);
	int *x = __get_cpu_var(y);

    Converts to

	int *x = this_cpu_ptr(y);

3. Retrieve the content of the current processors instance of a per cpu
variable.

	DEFINE_PER_CPU(int, y);
	int x = __get_cpu_var(y)

   Converts to

	int x = __this_cpu_read(y);

4. Retrieve the content of a percpu struct

	DEFINE_PER_CPU(struct mystruct, y);
	struct mystruct x = __get_cpu_var(y);

   Converts to

	memcpy(&x, this_cpu_ptr(&y), sizeof(x));

5. Assignment to a per cpu variable

	DEFINE_PER_CPU(int, y)
	__get_cpu_var(y) = x;

   Converts to

	__this_cpu_write(y, x);

6. Increment/Decrement etc of a per cpu variable

	DEFINE_PER_CPU(int, y);
	__get_cpu_var(y)++

   Converts to

	__this_cpu_inc(y)

Acked-by: Chris Metcalf <cmetcalf@tilera.com>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2014-08-26 13:45:54 -04:00

312 lines
10 KiB
C

/*
* Copyright 2010 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*/
#ifndef _ASM_TILE_IRQFLAGS_H
#define _ASM_TILE_IRQFLAGS_H
#include <arch/interrupts.h>
#include <arch/chip.h>
/*
* The set of interrupts we want to allow when interrupts are nominally
* disabled. The remainder are effectively "NMI" interrupts from
* the point of view of the generic Linux code. Note that synchronous
* interrupts (aka "non-queued") are not blocked by the mask in any case.
*/
#define LINUX_MASKABLE_INTERRUPTS \
(~((_AC(1,ULL) << INT_PERF_COUNT) | (_AC(1,ULL) << INT_AUX_PERF_COUNT)))
#if CHIP_HAS_SPLIT_INTR_MASK()
/* The same macro, but for the two 32-bit SPRs separately. */
#define LINUX_MASKABLE_INTERRUPTS_LO (-1)
#define LINUX_MASKABLE_INTERRUPTS_HI \
(~((1 << (INT_PERF_COUNT - 32)) | (1 << (INT_AUX_PERF_COUNT - 32))))
#endif
#ifndef __ASSEMBLY__
/* NOTE: we can't include <linux/percpu.h> due to #include dependencies. */
#include <asm/percpu.h>
#include <arch/spr_def.h>
/*
* Set and clear kernel interrupt masks.
*
* NOTE: __insn_mtspr() is a compiler builtin marked as a memory
* clobber. We rely on it being equivalent to a compiler barrier in
* this code since arch_local_irq_save() and friends must act as
* compiler barriers. This compiler semantic is baked into enough
* places that the compiler will maintain it going forward.
*/
#if CHIP_HAS_SPLIT_INTR_MASK()
#if INT_PERF_COUNT < 32 || INT_AUX_PERF_COUNT < 32 || INT_MEM_ERROR >= 32
# error Fix assumptions about which word various interrupts are in
#endif
#define interrupt_mask_set(n) do { \
int __n = (n); \
int __mask = 1 << (__n & 0x1f); \
if (__n < 32) \
__insn_mtspr(SPR_INTERRUPT_MASK_SET_K_0, __mask); \
else \
__insn_mtspr(SPR_INTERRUPT_MASK_SET_K_1, __mask); \
} while (0)
#define interrupt_mask_reset(n) do { \
int __n = (n); \
int __mask = 1 << (__n & 0x1f); \
if (__n < 32) \
__insn_mtspr(SPR_INTERRUPT_MASK_RESET_K_0, __mask); \
else \
__insn_mtspr(SPR_INTERRUPT_MASK_RESET_K_1, __mask); \
} while (0)
#define interrupt_mask_check(n) ({ \
int __n = (n); \
(((__n < 32) ? \
__insn_mfspr(SPR_INTERRUPT_MASK_K_0) : \
__insn_mfspr(SPR_INTERRUPT_MASK_K_1)) \
>> (__n & 0x1f)) & 1; \
})
#define interrupt_mask_set_mask(mask) do { \
unsigned long long __m = (mask); \
__insn_mtspr(SPR_INTERRUPT_MASK_SET_K_0, (unsigned long)(__m)); \
__insn_mtspr(SPR_INTERRUPT_MASK_SET_K_1, (unsigned long)(__m>>32)); \
} while (0)
#define interrupt_mask_reset_mask(mask) do { \
unsigned long long __m = (mask); \
__insn_mtspr(SPR_INTERRUPT_MASK_RESET_K_0, (unsigned long)(__m)); \
__insn_mtspr(SPR_INTERRUPT_MASK_RESET_K_1, (unsigned long)(__m>>32)); \
} while (0)
#define interrupt_mask_save_mask() \
(__insn_mfspr(SPR_INTERRUPT_MASK_SET_K_0) | \
(((unsigned long long)__insn_mfspr(SPR_INTERRUPT_MASK_SET_K_1))<<32))
#define interrupt_mask_restore_mask(mask) do { \
unsigned long long __m = (mask); \
__insn_mtspr(SPR_INTERRUPT_MASK_K_0, (unsigned long)(__m)); \
__insn_mtspr(SPR_INTERRUPT_MASK_K_1, (unsigned long)(__m>>32)); \
} while (0)
#else
#define interrupt_mask_set(n) \
__insn_mtspr(SPR_INTERRUPT_MASK_SET_K, (1UL << (n)))
#define interrupt_mask_reset(n) \
__insn_mtspr(SPR_INTERRUPT_MASK_RESET_K, (1UL << (n)))
#define interrupt_mask_check(n) \
((__insn_mfspr(SPR_INTERRUPT_MASK_K) >> (n)) & 1)
#define interrupt_mask_set_mask(mask) \
__insn_mtspr(SPR_INTERRUPT_MASK_SET_K, (mask))
#define interrupt_mask_reset_mask(mask) \
__insn_mtspr(SPR_INTERRUPT_MASK_RESET_K, (mask))
#define interrupt_mask_save_mask() \
__insn_mfspr(SPR_INTERRUPT_MASK_K)
#define interrupt_mask_restore_mask(mask) \
__insn_mtspr(SPR_INTERRUPT_MASK_K, (mask))
#endif
/*
* The set of interrupts we want active if irqs are enabled.
* Note that in particular, the tile timer interrupt comes and goes
* from this set, since we have no other way to turn off the timer.
* Likewise, INTCTRL_K is removed and re-added during device
* interrupts, as is the the hardwall UDN_FIREWALL interrupt.
* We use a low bit (MEM_ERROR) as our sentinel value and make sure it
* is always claimed as an "active interrupt" so we can query that bit
* to know our current state.
*/
DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
#define INITIAL_INTERRUPTS_ENABLED (1ULL << INT_MEM_ERROR)
#ifdef CONFIG_DEBUG_PREEMPT
/* Due to inclusion issues, we can't rely on <linux/smp.h> here. */
extern unsigned int debug_smp_processor_id(void);
# define smp_processor_id() debug_smp_processor_id()
#endif
/* Disable interrupts. */
#define arch_local_irq_disable() \
interrupt_mask_set_mask(LINUX_MASKABLE_INTERRUPTS)
/* Disable all interrupts, including NMIs. */
#define arch_local_irq_disable_all() \
interrupt_mask_set_mask(-1ULL)
/*
* Read the set of maskable interrupts.
* We avoid the preemption warning here via raw_cpu_ptr since even
* if irqs are already enabled, it's harmless to read the wrong cpu's
* enabled mask.
*/
#define arch_local_irqs_enabled() \
(*raw_cpu_ptr(&interrupts_enabled_mask))
/* Re-enable all maskable interrupts. */
#define arch_local_irq_enable() \
interrupt_mask_reset_mask(arch_local_irqs_enabled())
/* Disable or enable interrupts based on flag argument. */
#define arch_local_irq_restore(disabled) do { \
if (disabled) \
arch_local_irq_disable(); \
else \
arch_local_irq_enable(); \
} while (0)
/* Return true if "flags" argument means interrupts are disabled. */
#define arch_irqs_disabled_flags(flags) ((flags) != 0)
/* Return true if interrupts are currently disabled. */
#define arch_irqs_disabled() interrupt_mask_check(INT_MEM_ERROR)
/* Save whether interrupts are currently disabled. */
#define arch_local_save_flags() arch_irqs_disabled()
/* Save whether interrupts are currently disabled, then disable them. */
#define arch_local_irq_save() ({ \
unsigned long __flags = arch_local_save_flags(); \
arch_local_irq_disable(); \
__flags; })
/* Prevent the given interrupt from being enabled next time we enable irqs. */
#define arch_local_irq_mask(interrupt) \
this_cpu_and(interrupts_enabled_mask, ~(1ULL << (interrupt)))
/* Prevent the given interrupt from being enabled immediately. */
#define arch_local_irq_mask_now(interrupt) do { \
arch_local_irq_mask(interrupt); \
interrupt_mask_set(interrupt); \
} while (0)
/* Allow the given interrupt to be enabled next time we enable irqs. */
#define arch_local_irq_unmask(interrupt) \
this_cpu_or(interrupts_enabled_mask, (1ULL << (interrupt)))
/* Allow the given interrupt to be enabled immediately, if !irqs_disabled. */
#define arch_local_irq_unmask_now(interrupt) do { \
arch_local_irq_unmask(interrupt); \
if (!irqs_disabled()) \
interrupt_mask_reset(interrupt); \
} while (0)
#else /* __ASSEMBLY__ */
/* We provide a somewhat more restricted set for assembly. */
#ifdef __tilegx__
#if INT_MEM_ERROR != 0
# error Fix IRQS_DISABLED() macro
#endif
/* Return 0 or 1 to indicate whether interrupts are currently disabled. */
#define IRQS_DISABLED(tmp) \
mfspr tmp, SPR_INTERRUPT_MASK_K; \
andi tmp, tmp, 1
/* Load up a pointer to &interrupts_enabled_mask. */
#define GET_INTERRUPTS_ENABLED_MASK_PTR(reg) \
moveli reg, hw2_last(interrupts_enabled_mask); \
shl16insli reg, reg, hw1(interrupts_enabled_mask); \
shl16insli reg, reg, hw0(interrupts_enabled_mask); \
add reg, reg, tp
/* Disable interrupts. */
#define IRQ_DISABLE(tmp0, tmp1) \
moveli tmp0, hw2_last(LINUX_MASKABLE_INTERRUPTS); \
shl16insli tmp0, tmp0, hw1(LINUX_MASKABLE_INTERRUPTS); \
shl16insli tmp0, tmp0, hw0(LINUX_MASKABLE_INTERRUPTS); \
mtspr SPR_INTERRUPT_MASK_SET_K, tmp0
/* Disable ALL synchronous interrupts (used by NMI entry). */
#define IRQ_DISABLE_ALL(tmp) \
movei tmp, -1; \
mtspr SPR_INTERRUPT_MASK_SET_K, tmp
/* Enable interrupts. */
#define IRQ_ENABLE_LOAD(tmp0, tmp1) \
GET_INTERRUPTS_ENABLED_MASK_PTR(tmp0); \
ld tmp0, tmp0
#define IRQ_ENABLE_APPLY(tmp0, tmp1) \
mtspr SPR_INTERRUPT_MASK_RESET_K, tmp0
#else /* !__tilegx__ */
/*
* Return 0 or 1 to indicate whether interrupts are currently disabled.
* Note that it's important that we use a bit from the "low" mask word,
* since when we are enabling, that is the word we write first, so if we
* are interrupted after only writing half of the mask, the interrupt
* handler will correctly observe that we have interrupts enabled, and
* will enable interrupts itself on return from the interrupt handler
* (making the original code's write of the "high" mask word idempotent).
*/
#define IRQS_DISABLED(tmp) \
mfspr tmp, SPR_INTERRUPT_MASK_K_0; \
shri tmp, tmp, INT_MEM_ERROR; \
andi tmp, tmp, 1
/* Load up a pointer to &interrupts_enabled_mask. */
#define GET_INTERRUPTS_ENABLED_MASK_PTR(reg) \
moveli reg, lo16(interrupts_enabled_mask); \
auli reg, reg, ha16(interrupts_enabled_mask); \
add reg, reg, tp
/* Disable interrupts. */
#define IRQ_DISABLE(tmp0, tmp1) \
{ \
movei tmp0, LINUX_MASKABLE_INTERRUPTS_LO; \
moveli tmp1, lo16(LINUX_MASKABLE_INTERRUPTS_HI) \
}; \
{ \
mtspr SPR_INTERRUPT_MASK_SET_K_0, tmp0; \
auli tmp1, tmp1, ha16(LINUX_MASKABLE_INTERRUPTS_HI) \
}; \
mtspr SPR_INTERRUPT_MASK_SET_K_1, tmp1
/* Disable ALL synchronous interrupts (used by NMI entry). */
#define IRQ_DISABLE_ALL(tmp) \
movei tmp, -1; \
mtspr SPR_INTERRUPT_MASK_SET_K_0, tmp; \
mtspr SPR_INTERRUPT_MASK_SET_K_1, tmp
/* Enable interrupts. */
#define IRQ_ENABLE_LOAD(tmp0, tmp1) \
GET_INTERRUPTS_ENABLED_MASK_PTR(tmp0); \
{ \
lw tmp0, tmp0; \
addi tmp1, tmp0, 4 \
}; \
lw tmp1, tmp1
#define IRQ_ENABLE_APPLY(tmp0, tmp1) \
mtspr SPR_INTERRUPT_MASK_RESET_K_0, tmp0; \
mtspr SPR_INTERRUPT_MASK_RESET_K_1, tmp1
#endif
#define IRQ_ENABLE(tmp0, tmp1) \
IRQ_ENABLE_LOAD(tmp0, tmp1); \
IRQ_ENABLE_APPLY(tmp0, tmp1)
/*
* Do the CPU's IRQ-state tracing from assembly code. We call a
* C function, but almost everywhere we do, we don't mind clobbering
* all the caller-saved registers.
*/
#ifdef CONFIG_TRACE_IRQFLAGS
# define TRACE_IRQS_ON jal trace_hardirqs_on
# define TRACE_IRQS_OFF jal trace_hardirqs_off
#else
# define TRACE_IRQS_ON
# define TRACE_IRQS_OFF
#endif
#endif /* __ASSEMBLY__ */
#endif /* _ASM_TILE_IRQFLAGS_H */