linux_dsm_epyc7002/arch/powerpc/boot/div64.S

/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
 * Divide a 64-bit unsigned number by a 32-bit unsigned number.
 * This routine assumes that the top 32 bits of the dividend are
 * non-zero to start with.
 * On entry, r3 points to the dividend, which get overwritten with
 * the 64-bit quotient, and r4 contains the divisor.
 * On exit, r3 contains the remainder.
 *
 * Copyright (C) 2002 Paul Mackerras, IBM Corp.
 */
#include "ppc_asm.h"

	.globl __div64_32
__div64_32:
	lwz	r5,0(r3)	# get the dividend into r5/r6
	lwz	r6,4(r3)
	cmplw	r5,r4
	li	r7,0
	li	r8,0
	blt	1f
	divwu	r7,r5,r4	# if dividend.hi >= divisor,
	mullw	r0,r7,r4	# quotient.hi = dividend.hi / divisor
	subf.	r5,r0,r5	# dividend.hi %= divisor
	beq	3f
1:	mr	r11,r5		# here dividend.hi != 0
	andis.	r0,r5,0xc000
	bne	2f
	cntlzw	r0,r5		# we are shifting the dividend right
	li	r10,-1		# to make it < 2^32, and shifting
	srw	r10,r10,r0	# the divisor right the same amount,
	addc	r9,r4,r10	# rounding up (so the estimate cannot
	andc	r11,r6,r10	# ever be too large, only too small)
	andc	r9,r9,r10
	addze	r9,r9
	or	r11,r5,r11
	rotlw	r9,r9,r0
	rotlw	r11,r11,r0
	divwu	r11,r11,r9	# then we divide the shifted quantities
2:	mullw	r10,r11,r4	# to get an estimate of the quotient,
	mulhwu	r9,r11,r4	# multiply the estimate by the divisor,
	subfc	r6,r10,r6	# take the product from the divisor,
	add	r8,r8,r11	# and add the estimate to the accumulated
	subfe.	r5,r9,r5	# quotient
	bne	1b
3:	cmplw	r6,r4
	blt	4f
	divwu	r0,r6,r4	# perform the remaining 32-bit division
	mullw	r10,r0,r4	# and get the remainder
	add	r8,r8,r0
	subf	r6,r10,r6
4:	stw	r7,0(r3)	# return the quotient in *r3
	stw	r8,4(r3)
	mr	r3,r6		# return the remainder in r3
	blr

/*
 * Extended precision shifts.
 *
 * Updated to be valid for shift counts from 0 to 63 inclusive.
 * -- Gabriel
 *
 * R3/R4 has 64 bit value
 * R5    has shift count
 * result in R3/R4
 *
 *  ashrdi3: arithmetic right shift (sign propagation)	
 *  lshrdi3: logical right shift
 *  ashldi3: left shift
 */
	.globl __ashrdi3
__ashrdi3:
	subfic	r6,r5,32
	srw	r4,r4,r5	# LSW = count > 31 ? 0 : LSW >> count
	addi	r7,r5,32	# could be xori, or addi with -32
	slw	r6,r3,r6	# t1 = count > 31 ? 0 : MSW << (32-count)
	rlwinm	r8,r7,0,32	# t3 = (count < 32) ? 32 : 0
	sraw	r7,r3,r7	# t2 = MSW >> (count-32)
	or	r4,r4,r6	# LSW |= t1
	slw	r7,r7,r8	# t2 = (count < 32) ? 0 : t2
	sraw	r3,r3,r5	# MSW = MSW >> count
	or	r4,r4,r7	# LSW |= t2
	blr

	.globl __ashldi3
__ashldi3:
	subfic	r6,r5,32
	slw	r3,r3,r5	# MSW = count > 31 ? 0 : MSW << count
	addi	r7,r5,32	# could be xori, or addi with -32
	srw	r6,r4,r6	# t1 = count > 31 ? 0 : LSW >> (32-count)
	slw	r7,r4,r7	# t2 = count < 32 ? 0 : LSW << (count-32)
	or	r3,r3,r6	# MSW |= t1
	slw	r4,r4,r5	# LSW = LSW << count
	or	r3,r3,r7	# MSW |= t2
	blr

	.globl __lshrdi3
__lshrdi3:
	subfic	r6,r5,32
	srw	r4,r4,r5	# LSW = count > 31 ? 0 : LSW >> count
	addi	r7,r5,32	# could be xori, or addi with -32
	slw	r6,r3,r6	# t1 = count > 31 ? 0 : MSW << (32-count)
	srw	r7,r3,r7	# t2 = count < 32 ? 0 : MSW >> (count-32)
	or	r4,r4,r6	# LSW |= t1
	srw	r3,r3,r5	# MSW = MSW >> count
	or	r4,r4,r7	# LSW |= t2
	blr
treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 152 Based on 1 normalized pattern(s): this program is free software you can redistribute it and or modify it under the terms of the gnu general public license as published by the free software foundation either version 2 of the license or at your option any later version extracted by the scancode license scanner the SPDX license identifier GPL-2.0-or-later has been chosen to replace the boilerplate/reference in 3029 file(s). Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Allison Randal <allison@lohutok.net> Cc: linux-spdx@vger.kernel.org Link: https://lkml.kernel.org/r/20190527070032.746973796@linutronix.de Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> 2019-05-27 13:55:01 +07:00			`/* SPDX-License-Identifier: GPL-2.0-or-later */`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 05:20:36 +07:00			`/*`
			`* Divide a 64-bit unsigned number by a 32-bit unsigned number.`
			`* This routine assumes that the top 32 bits of the dividend are`
			`* non-zero to start with.`
			`* On entry, r3 points to the dividend, which get overwritten with`
			`* the 64-bit quotient, and r4 contains the divisor.`
			`* On exit, r3 contains the remainder.`
			`*`
			`* Copyright (C) 2002 Paul Mackerras, IBM Corp.`
			`*/`
[PATCH] ppc64: make arch/ppc64/boot standalone Make the bootheader for ppc64 independent from kernel and libc headers. * add -nostdinc -isystem $gccincludes to not include libc headers * declare all functions in header files, also the stuff from string.S * declare some functions static * use stddef.h to get size_t (hopefully ok) * remove ppc32-types.h, only elf.h used the __NN types With further modifications by Paul Mackerras and Stephen Rothwell. Signed-off-by: Olaf Hering <olh@suse.de> Signed-off-by: Paul Mackerras <paulus@samba.org> 2005-08-08 10:24:38 +07:00			`#include "ppc_asm.h"`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 05:20:36 +07:00
			`.globl __div64_32`
			`__div64_32:`
			`lwz r5,0(r3) # get the dividend into r5/r6`
			`lwz r6,4(r3)`
			`cmplw r5,r4`
			`li r7,0`
			`li r8,0`
			`blt 1f`
			`divwu r7,r5,r4 # if dividend.hi >= divisor,`
			`mullw r0,r7,r4 # quotient.hi = dividend.hi / divisor`
			`subf. r5,r0,r5 # dividend.hi %= divisor`
			`beq 3f`
			`1: mr r11,r5 # here dividend.hi != 0`
			`andis. r0,r5,0xc000`
			`bne 2f`
			`cntlzw r0,r5 # we are shifting the dividend right`
			`li r10,-1 # to make it < 2^32, and shifting`
			`srw r10,r10,r0 # the divisor right the same amount,`
powerpc: Fix div64 in bootloader The code is missing a fix that went into the main kernel variant (we should try to share that code again at some stage) Reported-by: Albert Cahalan <acahalan@gmail.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> 2010-11-18 10:39:24 +07:00			`addc r9,r4,r10 # rounding up (so the estimate cannot`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 05:20:36 +07:00			`andc r11,r6,r10 # ever be too large, only too small)`
			`andc r9,r9,r10`
powerpc: Fix div64 in bootloader The code is missing a fix that went into the main kernel variant (we should try to share that code again at some stage) Reported-by: Albert Cahalan <acahalan@gmail.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> 2010-11-18 10:39:24 +07:00			`addze r9,r9`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 05:20:36 +07:00			`or r11,r5,r11`
			`rotlw r9,r9,r0`
			`rotlw r11,r11,r0`
			`divwu r11,r11,r9 # then we divide the shifted quantities`
			`2: mullw r10,r11,r4 # to get an estimate of the quotient,`
			`mulhwu r9,r11,r4 # multiply the estimate by the divisor,`
			`subfc r6,r10,r6 # take the product from the divisor,`
			`add r8,r8,r11 # and add the estimate to the accumulated`
			`subfe. r5,r9,r5 # quotient`
			`bne 1b`
			`3: cmplw r6,r4`
			`blt 4f`
			`divwu r0,r6,r4 # perform the remaining 32-bit division`
			`mullw r10,r0,r4 # and get the remainder`
			`add r8,r8,r0`
			`subf r6,r10,r6`
			`4: stw r7,0(r3) # return the quotient in *r3`
			`stw r8,4(r3)`
			`mr r3,r6 # return the remainder in r3`
			`blr`
powerpc/boot: Add extended precision shifts to the boot wrapper. The upcomming currituck patches will need to do 64-bit shifts which will fail with undefined symbol without this patch. I looked at linking against libgcc but we can't guarantee that libgcc was compiled with soft-float. Also Using ../lib/div64.S or ../kernel/misc_32.S, this will break the build as the .o's need to be built with different flags for the bootwrapper vs the kernel. So for now the easyest option is to just copy code from arch/powerpc/kernel/misc_32.S I don't think this code changes too often ;P Signed-off-by: Tony Breeds <tony@bakeyournoodle.com> Signed-off-by: Josh Boyer <jwboyer@gmail.com> 2011-12-01 04:39:21 +07:00
			`/*`
			`* Extended precision shifts.`
			`*`
			`* Updated to be valid for shift counts from 0 to 63 inclusive.`
			`* -- Gabriel`
			`*`
			`* R3/R4 has 64 bit value`
			`* R5 has shift count`
			`* result in R3/R4`
			`*`
			`* ashrdi3: arithmetic right shift (sign propagation)`
			`* lshrdi3: logical right shift`
			`* ashldi3: left shift`
			`*/`
			`.globl __ashrdi3`
			`__ashrdi3:`
			`subfic r6,r5,32`
			`srw r4,r4,r5 # LSW = count > 31 ? 0 : LSW >> count`
			`addi r7,r5,32 # could be xori, or addi with -32`
			`slw r6,r3,r6 # t1 = count > 31 ? 0 : MSW << (32-count)`
			`rlwinm r8,r7,0,32 # t3 = (count < 32) ? 32 : 0`
			`sraw r7,r3,r7 # t2 = MSW >> (count-32)`
			`or r4,r4,r6 # LSW \|= t1`
			`slw r7,r7,r8 # t2 = (count < 32) ? 0 : t2`
			`sraw r3,r3,r5 # MSW = MSW >> count`
			`or r4,r4,r7 # LSW \|= t2`
			`blr`

			`.globl __ashldi3`
			`__ashldi3:`
			`subfic r6,r5,32`
			`slw r3,r3,r5 # MSW = count > 31 ? 0 : MSW << count`
			`addi r7,r5,32 # could be xori, or addi with -32`
			`srw r6,r4,r6 # t1 = count > 31 ? 0 : LSW >> (32-count)`
			`slw r7,r4,r7 # t2 = count < 32 ? 0 : LSW << (count-32)`
			`or r3,r3,r6 # MSW \|= t1`
			`slw r4,r4,r5 # LSW = LSW << count`
			`or r3,r3,r7 # MSW \|= t2`
			`blr`

			`.globl __lshrdi3`
			`__lshrdi3:`
			`subfic r6,r5,32`
			`srw r4,r4,r5 # LSW = count > 31 ? 0 : LSW >> count`
			`addi r7,r5,32 # could be xori, or addi with -32`
			`slw r6,r3,r6 # t1 = count > 31 ? 0 : MSW << (32-count)`
			`srw r7,r3,r7 # t2 = count < 32 ? 0 : MSW >> (count-32)`
			`or r4,r4,r6 # LSW \|= t1`
			`srw r3,r3,r5 # MSW = MSW >> count`
			`or r4,r4,r7 # LSW \|= t2`
			`blr`