linux_dsm_epyc7002/arch/arc/include/asm/cache.h
Vineet Gupta f734a31083 ARCv2: mm: micro-optimize region flush generated code
DC_CTRL.RGN_OP is 3 bits wide, however only 1 bit is used in current
programming model (0: flush, 1: invalidate)

The current code targetting 3 bits leads to additional 8 byte AND
operation which can be elided given that only 1 bit is ever set by
software and/or looked at by hardware

before
------

| 80b63324 <__dma_cache_wback_inv_l1>:
| 80b63324:	clri	r3
| 80b63328:	lr	r2,[dc_ctrl]
| 80b6332c:	and	r2,r2,0xfffff1ff	<--- 8 bytes insn
| 80b63334:	or	r2,r2,576
| 80b63338:	sr	r2,[dc_ctrl]
| ...
| ...
| 80b63360 <__dma_cache_inv_l1>:
| 80b63360:	clri	r3
| 80b63364:	lr	r2,[dc_ctrl]
| 80b63368:	and	r2,r2,0xfffff1ff	<--- 8 bytes insn
| 80b63370:	bset_s	r2,r2,0x9
| 80b63372:	sr	r2,[dc_ctrl]
| ...
| ...
| 80b6338c <__dma_cache_wback_l1>:
| 80b6338c:	clri	r3
| 80b63390:	lr	r2,[dc_ctrl]
| 80b63394:	and	r2,r2,0xfffff1ff	<--- 8 bytes insn
| 80b6339c:	sr	r2,[dc_ctrl]

after (AND elided totally in 2 cases, replaced with 2 byte BCLR in 3rd)
-----

| 80b63324 <__dma_cache_wback_inv_l1>:
| 80b63324:	clri	r3
| 80b63328:	lr	r2,[dc_ctrl]
| 80b6332c:	or	r2,r2,576
| 80b63330:	sr	r2,[dc_ctrl]
| ...
| ...
| 80b63358 <__dma_cache_inv_l1>:
| 80b63358:	clri	r3
| 80b6335c:	lr	r2,[dc_ctrl]
| 80b63360:	bset_s	r2,r2,0x9
| 80b63362:	sr	r2,[dc_ctrl]
| ...
| ...
| 80b6337c <__dma_cache_wback_l1>:
| 80b6337c:	clri	r3
| 80b63380:	lr	r2,[dc_ctrl]
| 80b63384:	bclr_s	r2,r2,0x9
| 80b63386:	sr	r2,[dc_ctrl]

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
2017-05-02 16:40:29 -07:00

114 lines
3.0 KiB
C

/*
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#ifndef __ARC_ASM_CACHE_H
#define __ARC_ASM_CACHE_H
/* In case $$ not config, setup a dummy number for rest of kernel */
#ifndef CONFIG_ARC_CACHE_LINE_SHIFT
#define L1_CACHE_SHIFT 6
#else
#define L1_CACHE_SHIFT CONFIG_ARC_CACHE_LINE_SHIFT
#endif
#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
#define CACHE_LINE_MASK (~(L1_CACHE_BYTES - 1))
/*
* ARC700 doesn't cache any access in top 1G (0xc000_0000 to 0xFFFF_FFFF)
* Ideal for wiring memory mapped peripherals as we don't need to do
* explicit uncached accesses (LD.di/ST.di) hence more portable drivers
*/
#define ARC_UNCACHED_ADDR_SPACE 0xc0000000
#ifndef __ASSEMBLY__
/* Uncached access macros */
#define arc_read_uncached_32(ptr) \
({ \
unsigned int __ret; \
__asm__ __volatile__( \
" ld.di %0, [%1] \n" \
: "=r"(__ret) \
: "r"(ptr)); \
__ret; \
})
#define arc_write_uncached_32(ptr, data)\
({ \
__asm__ __volatile__( \
" st.di %0, [%1] \n" \
: \
: "r"(data), "r"(ptr)); \
})
#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
extern void arc_cache_init(void);
extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
extern void read_decode_cache_bcr(void);
extern int ioc_enable;
extern unsigned long perip_base, perip_end;
#endif /* !__ASSEMBLY__ */
/* Instruction cache related Auxiliary registers */
#define ARC_REG_IC_BCR 0x77 /* Build Config reg */
#define ARC_REG_IC_IVIC 0x10
#define ARC_REG_IC_CTRL 0x11
#define ARC_REG_IC_IVIR 0x16
#define ARC_REG_IC_ENDR 0x17
#define ARC_REG_IC_IVIL 0x19
#define ARC_REG_IC_PTAG 0x1E
#define ARC_REG_IC_PTAG_HI 0x1F
/* Bit val in IC_CTRL */
#define IC_CTRL_DIS 0x1
/* Data cache related Auxiliary registers */
#define ARC_REG_DC_BCR 0x72 /* Build Config reg */
#define ARC_REG_DC_IVDC 0x47
#define ARC_REG_DC_CTRL 0x48
#define ARC_REG_DC_IVDL 0x4A
#define ARC_REG_DC_FLSH 0x4B
#define ARC_REG_DC_FLDL 0x4C
#define ARC_REG_DC_STARTR 0x4D
#define ARC_REG_DC_ENDR 0x4E
#define ARC_REG_DC_PTAG 0x5C
#define ARC_REG_DC_PTAG_HI 0x5F
/* Bit val in DC_CTRL */
#define DC_CTRL_DIS 0x001
#define DC_CTRL_INV_MODE_FLUSH 0x040
#define DC_CTRL_FLUSH_STATUS 0x100
#define DC_CTRL_RGN_OP_INV 0x200
#define DC_CTRL_RGN_OP_MSK 0x200
/*System-level cache (L2 cache) related Auxiliary registers */
#define ARC_REG_SLC_CFG 0x901
#define ARC_REG_SLC_CTRL 0x903
#define ARC_REG_SLC_FLUSH 0x904
#define ARC_REG_SLC_INVALIDATE 0x905
#define ARC_REG_SLC_RGN_START 0x914
#define ARC_REG_SLC_RGN_END 0x916
/* Bit val in SLC_CONTROL */
#define SLC_CTRL_DIS 0x001
#define SLC_CTRL_IM 0x040
#define SLC_CTRL_BUSY 0x100
#define SLC_CTRL_RGN_OP_INV 0x200
/* IO coherency related Auxiliary registers */
#define ARC_REG_IO_COH_ENABLE 0x500
#define ARC_REG_IO_COH_PARTIAL 0x501
#define ARC_REG_IO_COH_AP0_BASE 0x508
#define ARC_REG_IO_COH_AP0_SIZE 0x509
#endif /* _ASM_CACHE_H */