linux_dsm_epyc7002/arch/arc/include/asm/delay.h
Vineet Gupta 36425cd670 ARC: udelay: fix inline assembler by adding LP_COUNT to clobber list
commit 3c7c7a2fc8 ("ARC: Don't use "+l" inline asm constraint")
modified the inline assembly to setup LP_COUNT register manually and NOT
rely on gcc to do it (with the +l inline assembler contraint hint, now
being retired in the compiler)

However the fix was flawed as we didn't add LP_COUNT to asm clobber list,
meaning gcc doesn't know that LP_COUNT or zero-delay-loops are in action
in the inline asm.

This resulted in some fun - as nested ZOL loops were being generared

| mov lp_count,250000 ;16 # tmp235,
| lp .L__GCC__LP14 #		<======= OUTER LOOP (gcc generated)
|   .L14:
|   ld r2, [r5] # MEM[(volatile u32 *)prephitmp_43], w
|   dmb 1
|   breq r2, -1, @.L21 #, w,,
|   bbit0 r2,1,@.L13 # w,,
|   ld r4,[r7] ;25 # loops_per_jiffy, loops_per_jiffy
|   mpymu r3,r4,r6 #, loops_per_jiffy, tmp234
|
|   mov lp_count, r3 #		 <====== INNER LOOP (from inline asm)
|   lp 1f
| 	 nop
|   1:
|   nop_s
| .L__GCC__LP14: ; loop end, start is @.L14 #,

This caused issues with drivers relying on sane behaviour of udelay
friends.

With LP_COUNT added to clobber list, gcc doesn't generate the outer
loop in say above case.

Addresses STAR 9001146134

Reported-by: Joao Pinto <jpinto@synopsys.com>
Fixes: 3c7c7a2fc8 ("ARC: Don't use "+l" inline asm constraint")
Cc: stable@vger.kernel.org
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
2017-01-24 10:54:24 -08:00

70 lines
1.9 KiB
C

/*
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* Delay routines using pre computed loops_per_jiffy value.
*
* vineetg: Feb 2012
* -Rewrote in "C" to avoid dealing with availability of H/w MPY
* -Also reduced the num of MPY operations from 3 to 2
*
* Amit Bhor: Codito Technologies 2004
*/
#ifndef __ASM_ARC_UDELAY_H
#define __ASM_ARC_UDELAY_H
#include <asm/param.h> /* HZ */
static inline void __delay(unsigned long loops)
{
__asm__ __volatile__(
" mov lp_count, %0 \n"
" lp 1f \n"
" nop \n"
"1: \n"
:
: "r"(loops)
: "lp_count");
}
extern void __bad_udelay(void);
/*
* Normal Math for computing loops in "N" usecs
* -we have precomputed @loops_per_jiffy
* -1 sec has HZ jiffies
* loops per "N" usecs = ((loops_per_jiffy * HZ / 1000000) * N)
*
* Approximate Division by multiplication:
* -Mathematically if we multiply and divide a number by same value the
* result remains unchanged: In this case, we use 2^32
* -> (loops_per_N_usec * 2^32 ) / 2^32
* -> (((loops_per_jiffy * HZ / 1000000) * N) * 2^32) / 2^32
* -> (loops_per_jiffy * HZ * N * 4295) / 2^32
*
* -Divide by 2^32 is very simply right shift by 32
* -We simply need to ensure that the multiply per above eqn happens in
* 64-bit precision (if CPU doesn't support it - gcc can emaulate it)
*/
static inline void __udelay(unsigned long usecs)
{
unsigned long loops;
/* (u64) cast ensures 64 bit MPY - real or emulated
* HZ * 4295 is pre-evaluated by gcc - hence only 2 mpy ops
*/
loops = ((u64) usecs * 4295 * HZ * loops_per_jiffy) >> 32;
__delay(loops);
}
#define udelay(n) (__builtin_constant_p(n) ? ((n) > 20000 ? __bad_udelay() \
: __udelay(n)) : __udelay(n))
#endif /* __ASM_ARC_UDELAY_H */