mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-20 22:36:54 +07:00
36425cd670
commit3c7c7a2fc8
("ARC: Don't use "+l" inline asm constraint") modified the inline assembly to setup LP_COUNT register manually and NOT rely on gcc to do it (with the +l inline assembler contraint hint, now being retired in the compiler) However the fix was flawed as we didn't add LP_COUNT to asm clobber list, meaning gcc doesn't know that LP_COUNT or zero-delay-loops are in action in the inline asm. This resulted in some fun - as nested ZOL loops were being generared | mov lp_count,250000 ;16 # tmp235, | lp .L__GCC__LP14 # <======= OUTER LOOP (gcc generated) | .L14: | ld r2, [r5] # MEM[(volatile u32 *)prephitmp_43], w | dmb 1 | breq r2, -1, @.L21 #, w,, | bbit0 r2,1,@.L13 # w,, | ld r4,[r7] ;25 # loops_per_jiffy, loops_per_jiffy | mpymu r3,r4,r6 #, loops_per_jiffy, tmp234 | | mov lp_count, r3 # <====== INNER LOOP (from inline asm) | lp 1f | nop | 1: | nop_s | .L__GCC__LP14: ; loop end, start is @.L14 #, This caused issues with drivers relying on sane behaviour of udelay friends. With LP_COUNT added to clobber list, gcc doesn't generate the outer loop in say above case. Addresses STAR 9001146134 Reported-by: Joao Pinto <jpinto@synopsys.com> Fixes:3c7c7a2fc8
("ARC: Don't use "+l" inline asm constraint") Cc: stable@vger.kernel.org Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
70 lines
1.9 KiB
C
70 lines
1.9 KiB
C
/*
|
|
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
* Delay routines using pre computed loops_per_jiffy value.
|
|
*
|
|
* vineetg: Feb 2012
|
|
* -Rewrote in "C" to avoid dealing with availability of H/w MPY
|
|
* -Also reduced the num of MPY operations from 3 to 2
|
|
*
|
|
* Amit Bhor: Codito Technologies 2004
|
|
*/
|
|
|
|
#ifndef __ASM_ARC_UDELAY_H
|
|
#define __ASM_ARC_UDELAY_H
|
|
|
|
#include <asm/param.h> /* HZ */
|
|
|
|
static inline void __delay(unsigned long loops)
|
|
{
|
|
__asm__ __volatile__(
|
|
" mov lp_count, %0 \n"
|
|
" lp 1f \n"
|
|
" nop \n"
|
|
"1: \n"
|
|
:
|
|
: "r"(loops)
|
|
: "lp_count");
|
|
}
|
|
|
|
extern void __bad_udelay(void);
|
|
|
|
/*
|
|
* Normal Math for computing loops in "N" usecs
|
|
* -we have precomputed @loops_per_jiffy
|
|
* -1 sec has HZ jiffies
|
|
* loops per "N" usecs = ((loops_per_jiffy * HZ / 1000000) * N)
|
|
*
|
|
* Approximate Division by multiplication:
|
|
* -Mathematically if we multiply and divide a number by same value the
|
|
* result remains unchanged: In this case, we use 2^32
|
|
* -> (loops_per_N_usec * 2^32 ) / 2^32
|
|
* -> (((loops_per_jiffy * HZ / 1000000) * N) * 2^32) / 2^32
|
|
* -> (loops_per_jiffy * HZ * N * 4295) / 2^32
|
|
*
|
|
* -Divide by 2^32 is very simply right shift by 32
|
|
* -We simply need to ensure that the multiply per above eqn happens in
|
|
* 64-bit precision (if CPU doesn't support it - gcc can emaulate it)
|
|
*/
|
|
|
|
static inline void __udelay(unsigned long usecs)
|
|
{
|
|
unsigned long loops;
|
|
|
|
/* (u64) cast ensures 64 bit MPY - real or emulated
|
|
* HZ * 4295 is pre-evaluated by gcc - hence only 2 mpy ops
|
|
*/
|
|
loops = ((u64) usecs * 4295 * HZ * loops_per_jiffy) >> 32;
|
|
|
|
__delay(loops);
|
|
}
|
|
|
|
#define udelay(n) (__builtin_constant_p(n) ? ((n) > 20000 ? __bad_udelay() \
|
|
: __udelay(n)) : __udelay(n))
|
|
|
|
#endif /* __ASM_ARC_UDELAY_H */
|