mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-15 09:56:58 +07:00
ff5fdafc9e
The __memzero assembly code is almost identical to memset's except for two orr instructions. The runtime performance of __memset(p, n) and memset(p, 0, n) is accordingly almost identical. However, the memset() macro used to guard against a zero length and to call __memzero at compile time when the fill value is a constant zero interferes with compiler optimizations. Arnd found tha the test against a zero length brings up some new warnings with gcc v8: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82103 And successively rremoving the test against a zero length and the call to __memzero optimization produces the following kernel sizes for defconfig with gcc 6: text data bss dec hex filename 12248142 6278960 413588 18940690 1210312 vmlinux.orig 12244474 6278960 413588 18937022 120f4be vmlinux.no_zero_test 12239160 6278960 413588 18931708 120dffc vmlinux.no_memzero So it is probably not worth keeping __memzero around given that the compiler can do a better job at inlining trivial memset(p,0,n) on its own. And the memset code already handles a zero length just fine. Suggested-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Nicolas Pitre <nico@linaro.org> Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Acked-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
247 lines
5.8 KiB
ArmAsm
247 lines
5.8 KiB
ArmAsm
/*
|
|
* linux/arch/arm/kernel/head-common.S
|
|
*
|
|
* Copyright (C) 1994-2002 Russell King
|
|
* Copyright (c) 2003 ARM Limited
|
|
* All Rights Reserved
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
*/
|
|
#include <asm/assembler.h>
|
|
|
|
#define ATAG_CORE 0x54410001
|
|
#define ATAG_CORE_SIZE ((2*4 + 3*4) >> 2)
|
|
#define ATAG_CORE_SIZE_EMPTY ((2*4) >> 2)
|
|
|
|
#ifdef CONFIG_CPU_BIG_ENDIAN
|
|
#define OF_DT_MAGIC 0xd00dfeed
|
|
#else
|
|
#define OF_DT_MAGIC 0xedfe0dd0 /* 0xd00dfeed in big-endian */
|
|
#endif
|
|
|
|
/*
|
|
* Exception handling. Something went wrong and we can't proceed. We
|
|
* ought to tell the user, but since we don't have any guarantee that
|
|
* we're even running on the right architecture, we do virtually nothing.
|
|
*
|
|
* If CONFIG_DEBUG_LL is set we try to print out something about the error
|
|
* and hope for the best (useful if bootloader fails to pass a proper
|
|
* machine ID for example).
|
|
*/
|
|
__HEAD
|
|
|
|
/* Determine validity of the r2 atags pointer. The heuristic requires
|
|
* that the pointer be aligned, in the first 16k of physical RAM and
|
|
* that the ATAG_CORE marker is first and present. If CONFIG_OF_FLATTREE
|
|
* is selected, then it will also accept a dtb pointer. Future revisions
|
|
* of this function may be more lenient with the physical address and
|
|
* may also be able to move the ATAGS block if necessary.
|
|
*
|
|
* Returns:
|
|
* r2 either valid atags pointer, valid dtb pointer, or zero
|
|
* r5, r6 corrupted
|
|
*/
|
|
__vet_atags:
|
|
tst r2, #0x3 @ aligned?
|
|
bne 1f
|
|
|
|
ldr r5, [r2, #0]
|
|
#ifdef CONFIG_OF_FLATTREE
|
|
ldr r6, =OF_DT_MAGIC @ is it a DTB?
|
|
cmp r5, r6
|
|
beq 2f
|
|
#endif
|
|
cmp r5, #ATAG_CORE_SIZE @ is first tag ATAG_CORE?
|
|
cmpne r5, #ATAG_CORE_SIZE_EMPTY
|
|
bne 1f
|
|
ldr r5, [r2, #4]
|
|
ldr r6, =ATAG_CORE
|
|
cmp r5, r6
|
|
bne 1f
|
|
|
|
2: ret lr @ atag/dtb pointer is ok
|
|
|
|
1: mov r2, #0
|
|
ret lr
|
|
ENDPROC(__vet_atags)
|
|
|
|
/*
|
|
* The following fragment of code is executed with the MMU on in MMU mode,
|
|
* and uses absolute addresses; this is not position independent.
|
|
*
|
|
* r0 = cp#15 control register
|
|
* r1 = machine ID
|
|
* r2 = atags/dtb pointer
|
|
* r9 = processor ID
|
|
*/
|
|
__INIT
|
|
__mmap_switched:
|
|
|
|
mov r7, r1
|
|
mov r8, r2
|
|
mov r10, r0
|
|
|
|
adr r4, __mmap_switched_data
|
|
mov fp, #0
|
|
|
|
#if defined(CONFIG_XIP_DEFLATED_DATA)
|
|
ARM( ldr sp, [r4], #4 )
|
|
THUMB( ldr sp, [r4] )
|
|
THUMB( add r4, #4 )
|
|
bl __inflate_kernel_data @ decompress .data to RAM
|
|
teq r0, #0
|
|
bne __error
|
|
#elif defined(CONFIG_XIP_KERNEL)
|
|
ARM( ldmia r4!, {r0, r1, r2, sp} )
|
|
THUMB( ldmia r4!, {r0, r1, r2, r3} )
|
|
THUMB( mov sp, r3 )
|
|
sub r2, r2, r1
|
|
bl memcpy @ copy .data to RAM
|
|
#endif
|
|
|
|
ARM( ldmia r4!, {r0, r1, sp} )
|
|
THUMB( ldmia r4!, {r0, r1, r3} )
|
|
THUMB( mov sp, r3 )
|
|
sub r2, r1, r0
|
|
mov r1, #0
|
|
bl memset @ clear .bss
|
|
|
|
ldmia r4, {r0, r1, r2, r3}
|
|
str r9, [r0] @ Save processor ID
|
|
str r7, [r1] @ Save machine type
|
|
str r8, [r2] @ Save atags pointer
|
|
cmp r3, #0
|
|
strne r10, [r3] @ Save control register values
|
|
mov lr, #0
|
|
b start_kernel
|
|
ENDPROC(__mmap_switched)
|
|
|
|
.align 2
|
|
.type __mmap_switched_data, %object
|
|
__mmap_switched_data:
|
|
#ifdef CONFIG_XIP_KERNEL
|
|
#ifndef CONFIG_XIP_DEFLATED_DATA
|
|
.long _sdata @ r0
|
|
.long __data_loc @ r1
|
|
.long _edata_loc @ r2
|
|
#endif
|
|
.long __bss_stop @ sp (temporary stack in .bss)
|
|
#endif
|
|
|
|
.long __bss_start @ r0
|
|
.long __bss_stop @ r1
|
|
.long init_thread_union + THREAD_START_SP @ sp
|
|
|
|
.long processor_id @ r0
|
|
.long __machine_arch_type @ r1
|
|
.long __atags_pointer @ r2
|
|
#ifdef CONFIG_CPU_CP15
|
|
.long cr_alignment @ r3
|
|
#else
|
|
.long 0 @ r3
|
|
#endif
|
|
.size __mmap_switched_data, . - __mmap_switched_data
|
|
|
|
/*
|
|
* This provides a C-API version of __lookup_processor_type
|
|
*/
|
|
ENTRY(lookup_processor_type)
|
|
stmfd sp!, {r4 - r6, r9, lr}
|
|
mov r9, r0
|
|
bl __lookup_processor_type
|
|
mov r0, r5
|
|
ldmfd sp!, {r4 - r6, r9, pc}
|
|
ENDPROC(lookup_processor_type)
|
|
|
|
__FINIT
|
|
.text
|
|
|
|
/*
|
|
* Read processor ID register (CP#15, CR0), and look up in the linker-built
|
|
* supported processor list. Note that we can't use the absolute addresses
|
|
* for the __proc_info lists since we aren't running with the MMU on
|
|
* (and therefore, we are not in the correct address space). We have to
|
|
* calculate the offset.
|
|
*
|
|
* r9 = cpuid
|
|
* Returns:
|
|
* r3, r4, r6 corrupted
|
|
* r5 = proc_info pointer in physical address space
|
|
* r9 = cpuid (preserved)
|
|
*/
|
|
__lookup_processor_type:
|
|
adr r3, __lookup_processor_type_data
|
|
ldmia r3, {r4 - r6}
|
|
sub r3, r3, r4 @ get offset between virt&phys
|
|
add r5, r5, r3 @ convert virt addresses to
|
|
add r6, r6, r3 @ physical address space
|
|
1: ldmia r5, {r3, r4} @ value, mask
|
|
and r4, r4, r9 @ mask wanted bits
|
|
teq r3, r4
|
|
beq 2f
|
|
add r5, r5, #PROC_INFO_SZ @ sizeof(proc_info_list)
|
|
cmp r5, r6
|
|
blo 1b
|
|
mov r5, #0 @ unknown processor
|
|
2: ret lr
|
|
ENDPROC(__lookup_processor_type)
|
|
|
|
/*
|
|
* Look in <asm/procinfo.h> for information about the __proc_info structure.
|
|
*/
|
|
.align 2
|
|
.type __lookup_processor_type_data, %object
|
|
__lookup_processor_type_data:
|
|
.long .
|
|
.long __proc_info_begin
|
|
.long __proc_info_end
|
|
.size __lookup_processor_type_data, . - __lookup_processor_type_data
|
|
|
|
__error_lpae:
|
|
#ifdef CONFIG_DEBUG_LL
|
|
adr r0, str_lpae
|
|
bl printascii
|
|
b __error
|
|
str_lpae: .asciz "\nError: Kernel with LPAE support, but CPU does not support LPAE.\n"
|
|
#else
|
|
b __error
|
|
#endif
|
|
.align
|
|
ENDPROC(__error_lpae)
|
|
|
|
__error_p:
|
|
#ifdef CONFIG_DEBUG_LL
|
|
adr r0, str_p1
|
|
bl printascii
|
|
mov r0, r9
|
|
bl printhex8
|
|
adr r0, str_p2
|
|
bl printascii
|
|
b __error
|
|
str_p1: .asciz "\nError: unrecognized/unsupported processor variant (0x"
|
|
str_p2: .asciz ").\n"
|
|
.align
|
|
#endif
|
|
ENDPROC(__error_p)
|
|
|
|
__error:
|
|
#ifdef CONFIG_ARCH_RPC
|
|
/*
|
|
* Turn the screen red on a error - RiscPC only.
|
|
*/
|
|
mov r0, #0x02000000
|
|
mov r3, #0x11
|
|
orr r3, r3, r3, lsl #8
|
|
orr r3, r3, r3, lsl #16
|
|
str r3, [r0], #4
|
|
str r3, [r0], #4
|
|
str r3, [r0], #4
|
|
str r3, [r0], #4
|
|
#endif
|
|
1: mov r0, r0
|
|
b 1b
|
|
ENDPROC(__error)
|