linux_dsm_epyc7002/arch/s390/lib/div64.c

/*
 *  __div64_32 implementation for 31 bit.
 *
 *    Copyright IBM Corp. 2006
 *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
 */

#include <linux/types.h>
#include <linux/module.h>

#ifdef CONFIG_MARCH_G5

/*
 * Function to divide an unsigned 64 bit integer by an unsigned
 * 31 bit integer using signed 64/32 bit division.
 */
static uint32_t __div64_31(uint64_t *n, uint32_t base)
{
	register uint32_t reg2 asm("2");
	register uint32_t reg3 asm("3");
	uint32_t *words = (uint32_t *) n;
	uint32_t tmp;

	/* Special case base==1, remainder = 0, quotient = n */
	if (base == 1)
		return 0;
	/*
	 * Special case base==0 will cause a fixed point divide exception
	 * on the dr instruction and may not happen anyway. For the
	 * following calculation we can assume base > 1. The first
	 * signed 64 / 32 bit division with an upper half of 0 will
	 * give the correct upper half of the 64 bit quotient.
	 */
	reg2 = 0UL;
	reg3 = words[0];
	asm volatile(
		"	dr	%0,%2\n"
		: "+d" (reg2), "+d" (reg3) : "d" (base) : "cc" );
	words[0] = reg3;
	reg3 = words[1];
	/*
	 * To get the lower half of the 64 bit quotient and the 32 bit
	 * remainder we have to use a little trick. Since we only have
	 * a signed division the quotient can get too big. To avoid this
	 * the 64 bit dividend is halved, then the signed division will
	 * work. Afterwards the quotient and the remainder are doubled.
	 * If the last bit of the dividend has been one the remainder
	 * is increased by one then checked against the base. If the
	 * remainder has overflown subtract base and increase the
	 * quotient. Simple, no ?
	 */
	asm volatile(
		"	nr	%2,%1\n"
		"	srdl	%0,1\n"
		"	dr	%0,%3\n"
		"	alr	%0,%0\n"
		"	alr	%1,%1\n"
		"	alr	%0,%2\n"
		"	clr	%0,%3\n"
		"	jl	0f\n"
		"	slr	%0,%3\n"
		"	ahi	%1,1\n"
		"0:\n"
		: "+d" (reg2), "+d" (reg3), "=d" (tmp)
		: "d" (base), "2" (1UL) : "cc" );
	words[1] = reg3;
	return reg2;
}

/*
 * Function to divide an unsigned 64 bit integer by an unsigned
 * 32 bit integer using the unsigned 64/31 bit division.
 */
uint32_t __div64_32(uint64_t *n, uint32_t base)
{
	uint32_t r;

	/*
	 * If the most significant bit of base is set, divide n by
	 * (base/2). That allows to use 64/31 bit division and gives a
	 * good approximation of the result: n = (base/2)*q + r. The
	 * result needs to be corrected with two simple transformations.
	 * If base is already < 2^31-1 __div64_31 can be used directly.
	 */
	r = __div64_31(n, ((signed) base < 0) ? (base/2) : base);
	if ((signed) base < 0) {
		uint64_t q = *n;
		/*
		 * First transformation:
		 * n = (base/2)*q + r
		 *   = ((base/2)*2)*(q/2) + ((q&1) ? (base/2) : 0) + r
		 * Since r < (base/2), r + (base/2) < base.
		 * With q1 = (q/2) and r1 = r + ((q&1) ? (base/2) : 0)
		 * n = ((base/2)*2)*q1 + r1 with r1 < base.
		 */
		if (q & 1)
			r += base/2;
		q >>= 1;
		/*
		 * Second transformation. ((base/2)*2) could have lost the
		 * last bit.
		 * n = ((base/2)*2)*q1 + r1
		 *   = base*q1 - ((base&1) ? q1 : 0) + r1
		 */
		if (base & 1) {
			int64_t rx = r - q;
			/*
			 * base is >= 2^31. The worst case for the while
			 * loop is n=2^64-1 base=2^31+1. That gives a
			 * maximum for q=(2^64-1)/2^31 = 0x1ffffffff. Since
			 * base >= 2^31 the loop is finished after a maximum
			 * of three iterations.
			 */
			while (rx < 0) {
				rx += base;
				q--;
			}
			r = rx;
		}
		*n = q;
	}
	return r;
}

#else /* MARCH_G5 */

uint32_t __div64_32(uint64_t *n, uint32_t base)
{
	register uint32_t reg2 asm("2");
	register uint32_t reg3 asm("3");
	uint32_t *words = (uint32_t *) n;

	reg2 = 0UL;
	reg3 = words[0];
	asm volatile(
		"	dlr	%0,%2\n"
		: "+d" (reg2), "+d" (reg3) : "d" (base) : "cc" );
	words[0] = reg3;
	reg3 = words[1];
	asm volatile(
		"	dlr	%0,%2\n"
		: "+d" (reg2), "+d" (reg3) : "d" (base) : "cc" );
	words[1] = reg3;
	return reg2;
}

#endif /* MARCH_G5 */
[S390] __div64_32 for 31 bit. The clocksource infrastructure introduced with commit ad596171ed635c51a9eef829187af100cbf8dcf7 broke 31 bit s390. The reason is that the do_div() primitive for 31 bit always had a restriction: it could only divide an unsigned 64 bit integer by an unsigned 31 bit integer. The clocksource code now uses do_div() with a base value that has the most significant bit set. The result is that clock->cycle_interval has a funny value which causes the linux time to jump around like mad. The solution is "obvious": implement a proper __div64_32 function for 31 bit s390. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> 2006-09-28 21:55:39 +07:00			`/*`
			`* __div64_32 implementation for 31 bit.`
			`*`
s390/comments: unify copyright messages and remove file names Remove the file name from the comment at top of many files. In most cases the file name was wrong anyway, so it's rather pointless. Also unify the IBM copyright statement. We did have a lot of sightly different statements and wanted to change them one after another whenever a file gets touched. However that never happened. Instead people start to take the old/"wrong" statements to use as a template for new files. So unify all of them in one go. Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> 2012-07-20 16:15:04 +07:00			`* Copyright IBM Corp. 2006`
[S390] __div64_32 for 31 bit. The clocksource infrastructure introduced with commit ad596171ed635c51a9eef829187af100cbf8dcf7 broke 31 bit s390. The reason is that the do_div() primitive for 31 bit always had a restriction: it could only divide an unsigned 64 bit integer by an unsigned 31 bit integer. The clocksource code now uses do_div() with a base value that has the most significant bit set. The result is that clock->cycle_interval has a funny value which causes the linux time to jump around like mad. The solution is "obvious": implement a proper __div64_32 function for 31 bit s390. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> 2006-09-28 21:55:39 +07:00			`* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),`
			`*/`

			`#include <linux/types.h>`
			`#include <linux/module.h>`

			`#ifdef CONFIG_MARCH_G5`

			`/*`
			`* Function to divide an unsigned 64 bit integer by an unsigned`
			`* 31 bit integer using signed 64/32 bit division.`
			`*/`
			`static uint32_t __div64_31(uint64_t *n, uint32_t base)`
			`{`
			`register uint32_t reg2 asm("2");`
			`register uint32_t reg3 asm("3");`
			`uint32_t words = (uint32_t ) n;`
			`uint32_t tmp;`

			`/* Special case base==1, remainder = 0, quotient = n */`
			`if (base == 1)`
			`return 0;`
			`/*`
			`* Special case base==0 will cause a fixed point divide exception`
			`* on the dr instruction and may not happen anyway. For the`
			`* following calculation we can assume base > 1. The first`
			`* signed 64 / 32 bit division with an upper half of 0 will`
			`* give the correct upper half of the 64 bit quotient.`
			`*/`
			`reg2 = 0UL;`
			`reg3 = words[0];`
			`asm volatile(`
			`" dr %0,%2\n"`
			`: "+d" (reg2), "+d" (reg3) : "d" (base) : "cc" );`
			`words[0] = reg3;`
			`reg3 = words[1];`
			`/*`
			`* To get the lower half of the 64 bit quotient and the 32 bit`
			`* remainder we have to use a little trick. Since we only have`
			`* a signed division the quotient can get too big. To avoid this`
			`* the 64 bit dividend is halved, then the signed division will`
			`* work. Afterwards the quotient and the remainder are doubled.`
			`* If the last bit of the dividend has been one the remainder`
			`* is increased by one then checked against the base. If the`
			`* remainder has overflown subtract base and increase the`
			`* quotient. Simple, no ?`
			`*/`
			`asm volatile(`
			`" nr %2,%1\n"`
			`" srdl %0,1\n"`
			`" dr %0,%3\n"`
			`" alr %0,%0\n"`
			`" alr %1,%1\n"`
			`" alr %0,%2\n"`
			`" clr %0,%3\n"`
			`" jl 0f\n"`
			`" slr %0,%3\n"`
[S390] __div64_31 broken for CONFIG_MARCH_G5 The implementation of __div64_31 for G5 machines is broken. The comments in __div64_31 are correct, only the code does not do what the comments say. The part "If the remainder has overflown subtract base and increase the quotient" is only partially realized, the base is subtracted correctly but the quotient is only increased if the dividend had the last bit set. Using the correct instruction fixes the problem. Cc: stable@kernel.org Reported-by: Frans Pop <elendil@planet.nl> Tested-by: Frans Pop <elendil@planet.nl> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> 2009-03-18 19:27:32 +07:00			`" ahi %1,1\n"`
[S390] __div64_32 for 31 bit. The clocksource infrastructure introduced with commit ad596171ed635c51a9eef829187af100cbf8dcf7 broke 31 bit s390. The reason is that the do_div() primitive for 31 bit always had a restriction: it could only divide an unsigned 64 bit integer by an unsigned 31 bit integer. The clocksource code now uses do_div() with a base value that has the most significant bit set. The result is that clock->cycle_interval has a funny value which causes the linux time to jump around like mad. The solution is "obvious": implement a proper __div64_32 function for 31 bit s390. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> 2006-09-28 21:55:39 +07:00			`"0:\n"`
			`: "+d" (reg2), "+d" (reg3), "=d" (tmp)`
			`: "d" (base), "2" (1UL) : "cc" );`
			`words[1] = reg3;`
			`return reg2;`
			`}`

			`/*`
			`* Function to divide an unsigned 64 bit integer by an unsigned`
			`* 32 bit integer using the unsigned 64/31 bit division.`
			`*/`
			`uint32_t __div64_32(uint64_t *n, uint32_t base)`
			`{`
			`uint32_t r;`

			`/*`
			`* If the most significant bit of base is set, divide n by`
			`* (base/2). That allows to use 64/31 bit division and gives a`
			`* good approximation of the result: n = (base/2)*q + r. The`
			`* result needs to be corrected with two simple transformations.`
			`* If base is already < 2^31-1 __div64_31 can be used directly.`
			`*/`
			`r = __div64_31(n, ((signed) base < 0) ? (base/2) : base);`
			`if ((signed) base < 0) {`
			`uint64_t q = *n;`
			`/*`
			`* First transformation:`
			`* n = (base/2)*q + r`
			`* = ((base/2)2)(q/2) + ((q&1) ? (base/2) : 0) + r`
			`* Since r < (base/2), r + (base/2) < base.`
			`* With q1 = (q/2) and r1 = r + ((q&1) ? (base/2) : 0)`
			`* n = ((base/2)2)q1 + r1 with r1 < base.`
			`*/`
			`if (q & 1)`
			`r += base/2;`
			`q >>= 1;`
			`/*`
			`* Second transformation. ((base/2)*2) could have lost the`
			`* last bit.`
			`* n = ((base/2)2)q1 + r1`
			`* = base*q1 - ((base&1) ? q1 : 0) + r1`
			`*/`
			`if (base & 1) {`
			`int64_t rx = r - q;`
			`/*`
			`* base is >= 2^31. The worst case for the while`
			`* loop is n=2^64-1 base=2^31+1. That gives a`
			`* maximum for q=(2^64-1)/2^31 = 0x1ffffffff. Since`
			`* base >= 2^31 the loop is finished after a maximum`
			`* of three iterations.`
			`*/`
			`while (rx < 0) {`
			`rx += base;`
			`q--;`
			`}`
			`r = rx;`
			`}`
			`*n = q;`
			`}`
			`return r;`
			`}`

			`#else /* MARCH_G5 */`

			`uint32_t __div64_32(uint64_t *n, uint32_t base)`
			`{`
			`register uint32_t reg2 asm("2");`
			`register uint32_t reg3 asm("3");`
			`uint32_t words = (uint32_t ) n;`

			`reg2 = 0UL;`
			`reg3 = words[0];`
			`asm volatile(`
			`" dlr %0,%2\n"`
			`: "+d" (reg2), "+d" (reg3) : "d" (base) : "cc" );`
			`words[0] = reg3;`
			`reg3 = words[1];`
			`asm volatile(`
			`" dlr %0,%2\n"`
			`: "+d" (reg2), "+d" (reg3) : "d" (base) : "cc" );`
			`words[1] = reg3;`
			`return reg2;`
			`}`

			`#endif /* MARCH_G5 */`