linux_dsm_epyc7002/arch/mn10300/lib/memmove.S

/* MN10300 Optimised simple memory to memory copy, with support for overlapping
 * regions
 *
 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public Licence
 * as published by the Free Software Foundation; either version
 * 2 of the Licence, or (at your option) any later version.
 */
#include <asm/cache.h>

        .section .text
        .balign	L1_CACHE_BYTES

###############################################################################
#
# void *memmove(void *dst, const void *src, size_t n)
#
###############################################################################
	.globl	memmove
        .type	memmove,@function
memmove:
	# fall back to memcpy if dst < src to work bottom up
	cmp	d1,d0
	bcs	memmove_memcpy

	# work top down
	movm	[d2,d3],(sp)
	mov	d0,(12,sp)
	mov	d1,(16,sp)
	mov	(20,sp),d2			# count
	add	d0,d2,a0			# dst end
	add	d1,d2,a1			# src end
	mov	d0,e3				# the return value

	cmp	+0,d2
	beq	memmove_done			# return if zero-length copy

	# see if the three parameters are all four-byte aligned
	or	d0,d1,d3
	or	d2,d3
	and	+3,d3
	bne	memmove_1			# jump if not

	# we want to transfer as much as we can in chunks of 32 bytes
	add	-4,a1
	cmp	+31,d2
	bls	memmove_4_remainder		# 4-byte aligned remainder

	add	-32,d2
	mov	+32,d3

memmove_4_loop:
	mov	(a1),d0
	sub_sub	+4,a1,+4,a0
	mov	d0,(a0)
	mov	(a1),d1
	sub_sub	+4,a1,+4,a0
	mov	d1,(a0)

	mov	(a1),d0
	sub_sub	+4,a1,+4,a0
	mov	d0,(a0)
	mov	(a1),d1
	sub_sub	+4,a1,+4,a0
	mov	d1,(a0)

	mov	(a1),d0
	sub_sub	+4,a1,+4,a0
	mov	d0,(a0)
	mov	(a1),d1
	sub_sub	+4,a1,+4,a0
	mov	d1,(a0)

	mov	(a1),d0
	sub_sub	+4,a1,+4,a0
	mov	d0,(a0)
	mov	(a1),d1
	sub_sub	+4,a1,+4,a0
	mov	d1,(a0)

	sub	d3,d2
	bcc	memmove_4_loop

	add	d3,d2
	beq	memmove_4_no_remainder

memmove_4_remainder:
	# cut 4-7 words down to 0-3
	cmp	+16,d2
	bcs	memmove_4_three_or_fewer_words
	mov	(a1),d0
	sub_sub	+4,a1,+4,a0
	mov	d0,(a0)
	mov	(a1),d1
	sub_sub	+4,a1,+4,a0
	mov	d1,(a0)
	mov	(a1),e0
	sub_sub	+4,a1,+4,a0
	mov	e0,(a0)
	mov	(a1),e1
	sub_sub	+4,a1,+4,a0
	mov	e1,(a0)
	add	-16,d2
	beq	memmove_4_no_remainder

	# copy the remaining 1, 2 or 3 words
memmove_4_three_or_fewer_words:
	cmp	+8,d2
	bcs	memmove_4_one_word
	beq	memmove_4_two_words
	mov	(a1),d0
	sub_sub	+4,a1,+4,a0
	mov	d0,(a0)
memmove_4_two_words:
	mov	(a1),d0
	sub_sub	+4,a1,+4,a0
	mov	d0,(a0)
memmove_4_one_word:
	mov	(a1),d0
	sub_sub	+4,a1,+4,a0
	mov	d0,(a0)

memmove_4_no_remainder:
	# check we copied the correct amount
	# TODO: REMOVE CHECK
	sub	e3,a0,d2
	beq	memmove_done
	break
	break
	break

memmove_done:
	mov	e3,a0
	ret	[d2,d3],8

	# handle misaligned copying
memmove_1:
	add	-1,a1
	add	-1,d2
	mov	+1,d3
	setlb					# setlb requires the next insns
						# to occupy exactly 4 bytes

	sub	d3,d2
	movbu	(a1),d0
	sub_sub	d3,a1,d3,a0
	movbu	d0,(a0)
	lcc

	mov	e3,a0
	ret	[d2,d3],8

memmove_memcpy:
	jmp	memcpy

memmove_end:
	.size	memmove, memmove_end-memmove
mn10300: add the MN10300/AM33 architecture to the kernel Add architecture support for the MN10300/AM33 CPUs produced by MEI to the kernel. This patch also adds board support for the ASB2303 with the ASB2308 daughter board, and the ASB2305. The only processor supported is the MN103E010, which is an AM33v2 core plus on-chip devices. [akpm@linux-foundation.org: nuke cvs control strings] Signed-off-by: Masakazu Urade <urade.masakazu@jp.panasonic.com> Signed-off-by: Koichi Yasutake <yasutake.koichi@jp.panasonic.com> Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> 2008-02-08 19:19:31 +07:00			`/* MN10300 Optimised simple memory to memory copy, with support for overlapping`
			`* regions`
			`*`
			`* Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.`
			`* Written by David Howells (dhowells@redhat.com)`
			`*`
			`* This program is free software; you can redistribute it and/or`
			`* modify it under the terms of the GNU General Public Licence`
			`* as published by the Free Software Foundation; either version`
			`* 2 of the Licence, or (at your option) any later version.`
			`*/`
			`#include <asm/cache.h>`

			`.section .text`
			`.balign L1_CACHE_BYTES`

			`###############################################################################`
			`#`
			`# void memmove(void dst, const void *src, size_t n)`
			`#`
			`###############################################################################`
			`.globl memmove`
			`.type memmove,@function`
			`memmove:`
			`# fall back to memcpy if dst < src to work bottom up`
			`cmp d1,d0`
			`bcs memmove_memcpy`

			`# work top down`
			`movm [d2,d3],(sp)`
			`mov d0,(12,sp)`
			`mov d1,(16,sp)`
			`mov (20,sp),d2 # count`
			`add d0,d2,a0 # dst end`
			`add d1,d2,a1 # src end`
			`mov d0,e3 # the return value`

			`cmp +0,d2`
			`beq memmove_done # return if zero-length copy`

			`# see if the three parameters are all four-byte aligned`
			`or d0,d1,d3`
			`or d2,d3`
			`and +3,d3`
			`bne memmove_1 # jump if not`

			`# we want to transfer as much as we can in chunks of 32 bytes`
			`add -4,a1`
			`cmp +31,d2`
			`bls memmove_4_remainder # 4-byte aligned remainder`

			`add -32,d2`
			`mov +32,d3`

			`memmove_4_loop:`
			`mov (a1),d0`
			`sub_sub +4,a1,+4,a0`
			`mov d0,(a0)`
			`mov (a1),d1`
			`sub_sub +4,a1,+4,a0`
			`mov d1,(a0)`

			`mov (a1),d0`
			`sub_sub +4,a1,+4,a0`
			`mov d0,(a0)`
			`mov (a1),d1`
			`sub_sub +4,a1,+4,a0`
			`mov d1,(a0)`

			`mov (a1),d0`
			`sub_sub +4,a1,+4,a0`
			`mov d0,(a0)`
			`mov (a1),d1`
			`sub_sub +4,a1,+4,a0`
			`mov d1,(a0)`

			`mov (a1),d0`
			`sub_sub +4,a1,+4,a0`
			`mov d0,(a0)`
			`mov (a1),d1`
			`sub_sub +4,a1,+4,a0`
			`mov d1,(a0)`

			`sub d3,d2`
			`bcc memmove_4_loop`

			`add d3,d2`
			`beq memmove_4_no_remainder`

			`memmove_4_remainder:`
			`# cut 4-7 words down to 0-3`
			`cmp +16,d2`
			`bcs memmove_4_three_or_fewer_words`
			`mov (a1),d0`
			`sub_sub +4,a1,+4,a0`
			`mov d0,(a0)`
			`mov (a1),d1`
			`sub_sub +4,a1,+4,a0`
			`mov d1,(a0)`
			`mov (a1),e0`
			`sub_sub +4,a1,+4,a0`
			`mov e0,(a0)`
			`mov (a1),e1`
			`sub_sub +4,a1,+4,a0`
			`mov e1,(a0)`
			`add -16,d2`
			`beq memmove_4_no_remainder`

			`# copy the remaining 1, 2 or 3 words`
			`memmove_4_three_or_fewer_words:`
			`cmp +8,d2`
			`bcs memmove_4_one_word`
			`beq memmove_4_two_words`
			`mov (a1),d0`
			`sub_sub +4,a1,+4,a0`
			`mov d0,(a0)`
			`memmove_4_two_words:`
			`mov (a1),d0`
			`sub_sub +4,a1,+4,a0`
			`mov d0,(a0)`
			`memmove_4_one_word:`
			`mov (a1),d0`
			`sub_sub +4,a1,+4,a0`
			`mov d0,(a0)`

			`memmove_4_no_remainder:`
			`# check we copied the correct amount`
			`# TODO: REMOVE CHECK`
			`sub e3,a0,d2`
			`beq memmove_done`
			`break`
			`break`
			`break`

			`memmove_done:`
			`mov e3,a0`
			`ret [d2,d3],8`

			`# handle misaligned copying`
			`memmove_1:`
			`add -1,a1`
			`add -1,d2`
			`mov +1,d3`
			`setlb # setlb requires the next insns`
			`# to occupy exactly 4 bytes`

			`sub d3,d2`
			`movbu (a1),d0`
			`sub_sub d3,a1,d3,a0`
			`movbu d0,(a0)`
			`lcc`

			`mov e3,a0`
			`ret [d2,d3],8`

			`memmove_memcpy:`
			`jmp memcpy`

			`memmove_end:`
			`.size memmove, memmove_end-memmove`