linux_dsm_epyc7002/arch/c6x/lib/divu.S

;; SPDX-License-Identifier: GPL-2.0-or-later
;;  Copyright 2010  Free Software Foundation, Inc.
;;  Contributed by Bernd Schmidt <bernds@codesourcery.com>.
;;

#include <linux/linkage.h>

	;; ABI considerations for the divide functions
	;; The following registers are call-used:
	;; __c6xabi_divi A0,A1,A2,A4,A6,B0,B1,B2,B4,B5
	;; __c6xabi_divu A0,A1,A2,A4,A6,B0,B1,B2,B4
	;; __c6xabi_remi A1,A2,A4,A5,A6,B0,B1,B2,B4
	;; __c6xabi_remu A1,A4,A5,A7,B0,B1,B2,B4
	;;
	;; In our implementation, divu and remu are leaf functions,
	;; while both divi and remi call into divu.
	;; A0 is not clobbered by any of the functions.
	;; divu does not clobber B2 either, which is taken advantage of
	;; in remi.
	;; divi uses B5 to hold the original return address during
	;; the call to divu.
	;; remi uses B2 and A5 to hold the input values during the
	;; call to divu.  It stores B3 in on the stack.

	.text
ENTRY(__c6xabi_divu)
	;; We use a series of up to 31 subc instructions.  First, we find
	;; out how many leading zero bits there are in the divisor.  This
	;; gives us both a shift count for aligning (shifting) the divisor
	;; to the, and the number of times we have to execute subc.

	;; At the end, we have both the remainder and most of the quotient
	;; in A4.  The top bit of the quotient is computed first and is
	;; placed in A2.

	;; Return immediately if the dividend is zero.
	 mv	.s2x	A4, B1
   [B1]	 lmbd	.l2	1, B4, B1
|| [!B1] b	.s2	B3	; RETURN A
|| [!B1] mvk	.d2	1, B4
	 mv	.l1x	B1, A6
||	 shl	.s2	B4, B1, B4

	;; The loop performs a maximum of 28 steps, so we do the
	;; first 3 here.
	 cmpltu	.l1x	A4, B4, A2
   [!A2] sub	.l1x	A4, B4, A4
||	 shru	.s2	B4, 1, B4
||	 xor	.s1	1, A2, A2

	 shl	.s1	A2, 31, A2
|| [B1]	 subc	.l1x	A4,B4,A4
|| [B1]	 add	.s2	-1, B1, B1
   [B1]	 subc	.l1x	A4,B4,A4
|| [B1]	 add	.s2	-1, B1, B1

	;; RETURN A may happen here (note: must happen before the next branch)
_divu_loop:
	 cmpgt	.l2	B1, 7, B0
|| [B1]	 subc	.l1x	A4,B4,A4
|| [B1]	 add	.s2	-1, B1, B1
   [B1]	 subc	.l1x	A4,B4,A4
|| [B1]	 add	.s2	-1, B1, B1
|| [B0]  b	.s1	_divu_loop
   [B1]	 subc	.l1x	A4,B4,A4
|| [B1]	 add	.s2	-1, B1, B1
   [B1]	 subc	.l1x	A4,B4,A4
|| [B1]	 add	.s2	-1, B1, B1
   [B1]	 subc	.l1x	A4,B4,A4
|| [B1]	 add	.s2	-1, B1, B1
   [B1]	 subc	.l1x	A4,B4,A4
|| [B1]	 add	.s2	-1, B1, B1
   [B1]	 subc	.l1x	A4,B4,A4
|| [B1]	 add	.s2	-1, B1, B1
	;; loop backwards branch happens here

	 ret	.s2	B3
||	 mvk	.s1	32, A1
	 sub	.l1	A1, A6, A6
	 shl	.s1	A4, A6, A4
	 shru	.s1	A4, 1, A4
||	 sub	.l1	A6, 1, A6
	 or	.l1	A2, A4, A4
	 shru	.s1	A4, A6, A4
	 nop
ENDPROC(__c6xabi_divu)
treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 156 Based on 1 normalized pattern(s): this program is free software you can redistribute it and or modify it under the terms of the gnu general public license as published by the free software foundation either version 2 of the license or at your option any later version this program is distributed in the hope that it will be useful but without any warranty without even the implied warranty of merchantability or fitness for a particular purpose see the gnu general public license for more details you should have received a copy of the gnu general public license along with this program if not write to the free software foundation inc 59 temple place suite 330 boston ma 02111 1307 usa extracted by the scancode license scanner the SPDX license identifier GPL-2.0-or-later has been chosen to replace the boilerplate/reference in 1334 file(s). Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Allison Randal <allison@lohutok.net> Reviewed-by: Richard Fontana <rfontana@redhat.com> Cc: linux-spdx@vger.kernel.org Link: https://lkml.kernel.org/r/20190527070033.113240726@linutronix.de Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> 2019-05-27 13:55:05 +07:00			`;; SPDX-License-Identifier: GPL-2.0-or-later`
C6X: library code Original port to early 2.6 kernel using TI COFF toolchain. Brought up to date by Mark Salter <msalter@redhat.com> Signed-off-by: Aurelien Jacquiot <a-jacquiot@ti.com> Signed-off-by: Mark Salter <msalter@redhat.com> Acked-by: Arnd Bergmann <arnd@arndb.de> 2011-10-04 22:15:51 +07:00			`;; Copyright 2010 Free Software Foundation, Inc.`
			`;; Contributed by Bernd Schmidt <bernds@codesourcery.com>.`
			`;;`

			`#include <linux/linkage.h>`

			`;; ABI considerations for the divide functions`
			`;; The following registers are call-used:`
			`;; __c6xabi_divi A0,A1,A2,A4,A6,B0,B1,B2,B4,B5`
			`;; __c6xabi_divu A0,A1,A2,A4,A6,B0,B1,B2,B4`
			`;; __c6xabi_remi A1,A2,A4,A5,A6,B0,B1,B2,B4`
			`;; __c6xabi_remu A1,A4,A5,A7,B0,B1,B2,B4`
			`;;`
			`;; In our implementation, divu and remu are leaf functions,`
			`;; while both divi and remi call into divu.`
			`;; A0 is not clobbered by any of the functions.`
			`;; divu does not clobber B2 either, which is taken advantage of`
			`;; in remi.`
			`;; divi uses B5 to hold the original return address during`
			`;; the call to divu.`
			`;; remi uses B2 and A5 to hold the input values during the`
			`;; call to divu. It stores B3 in on the stack.`

			`.text`
			`ENTRY(__c6xabi_divu)`
			`;; We use a series of up to 31 subc instructions. First, we find`
			`;; out how many leading zero bits there are in the divisor. This`
			`;; gives us both a shift count for aligning (shifting) the divisor`
			`;; to the, and the number of times we have to execute subc.`

			`;; At the end, we have both the remainder and most of the quotient`
			`;; in A4. The top bit of the quotient is computed first and is`
			`;; placed in A2.`

			`;; Return immediately if the dividend is zero.`
			`mv .s2x A4, B1`
			`[B1] lmbd .l2 1, B4, B1`
			`\|\| [!B1] b .s2 B3 ; RETURN A`
			`\|\| [!B1] mvk .d2 1, B4`
			`mv .l1x B1, A6`
			`\|\| shl .s2 B4, B1, B4`

			`;; The loop performs a maximum of 28 steps, so we do the`
			`;; first 3 here.`
			`cmpltu .l1x A4, B4, A2`
			`[!A2] sub .l1x A4, B4, A4`
			`\|\| shru .s2 B4, 1, B4`
			`\|\| xor .s1 1, A2, A2`

			`shl .s1 A2, 31, A2`
			`\|\| [B1] subc .l1x A4,B4,A4`
			`\|\| [B1] add .s2 -1, B1, B1`
			`[B1] subc .l1x A4,B4,A4`
			`\|\| [B1] add .s2 -1, B1, B1`

			`;; RETURN A may happen here (note: must happen before the next branch)`
			`_divu_loop:`
			`cmpgt .l2 B1, 7, B0`
			`\|\| [B1] subc .l1x A4,B4,A4`
			`\|\| [B1] add .s2 -1, B1, B1`
			`[B1] subc .l1x A4,B4,A4`
			`\|\| [B1] add .s2 -1, B1, B1`
			`\|\| [B0] b .s1 _divu_loop`
			`[B1] subc .l1x A4,B4,A4`
			`\|\| [B1] add .s2 -1, B1, B1`
			`[B1] subc .l1x A4,B4,A4`
			`\|\| [B1] add .s2 -1, B1, B1`
			`[B1] subc .l1x A4,B4,A4`
			`\|\| [B1] add .s2 -1, B1, B1`
			`[B1] subc .l1x A4,B4,A4`
			`\|\| [B1] add .s2 -1, B1, B1`
			`[B1] subc .l1x A4,B4,A4`
			`\|\| [B1] add .s2 -1, B1, B1`
			`;; loop backwards branch happens here`

			`ret .s2 B3`
			`\|\| mvk .s1 32, A1`
			`sub .l1 A1, A6, A6`
			`shl .s1 A4, A6, A4`
			`shru .s1 A4, 1, A4`
			`\|\| sub .l1 A6, 1, A6`
			`or .l1 A2, A4, A4`
			`shru .s1 A4, A6, A4`
			`nop`
			`ENDPROC(__c6xabi_divu)`