mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-23 00:47:11 +07:00
caab277b1d
Based on 1 normalized pattern(s): this program is free software you can redistribute it and or modify it under the terms of the gnu general public license version 2 as published by the free software foundation this program is distributed in the hope that it will be useful but without any warranty without even the implied warranty of merchantability or fitness for a particular purpose see the gnu general public license for more details you should have received a copy of the gnu general public license along with this program if not see http www gnu org licenses extracted by the scancode license scanner the SPDX license identifier GPL-2.0-only has been chosen to replace the boilerplate/reference in 503 file(s). Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Alexios Zavras <alexios.zavras@intel.com> Reviewed-by: Allison Randal <allison@lohutok.net> Reviewed-by: Enrico Weigelt <info@metux.net> Cc: linux-spdx@vger.kernel.org Link: https://lkml.kernel.org/r/20190602204653.811534538@linutronix.de Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
116 lines
2.9 KiB
ArmAsm
116 lines
2.9 KiB
ArmAsm
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
/*
|
|
* Copyright (C) 2013 ARM Ltd.
|
|
* Copyright (C) 2013 Linaro.
|
|
*
|
|
* This code is based on glibc cortex strings work originally authored by Linaro
|
|
* be found @
|
|
*
|
|
* http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
|
|
* files/head:/src/aarch64/
|
|
*/
|
|
|
|
#include <linux/linkage.h>
|
|
#include <asm/assembler.h>
|
|
|
|
/*
|
|
* calculate the length of a string
|
|
*
|
|
* Parameters:
|
|
* x0 - const string pointer
|
|
* Returns:
|
|
* x0 - the return length of specific string
|
|
*/
|
|
|
|
/* Arguments and results. */
|
|
srcin .req x0
|
|
len .req x0
|
|
|
|
/* Locals and temporaries. */
|
|
src .req x1
|
|
data1 .req x2
|
|
data2 .req x3
|
|
data2a .req x4
|
|
has_nul1 .req x5
|
|
has_nul2 .req x6
|
|
tmp1 .req x7
|
|
tmp2 .req x8
|
|
tmp3 .req x9
|
|
tmp4 .req x10
|
|
zeroones .req x11
|
|
pos .req x12
|
|
|
|
#define REP8_01 0x0101010101010101
|
|
#define REP8_7f 0x7f7f7f7f7f7f7f7f
|
|
#define REP8_80 0x8080808080808080
|
|
|
|
WEAK(strlen)
|
|
mov zeroones, #REP8_01
|
|
bic src, srcin, #15
|
|
ands tmp1, srcin, #15
|
|
b.ne .Lmisaligned
|
|
/*
|
|
* NUL detection works on the principle that (X - 1) & (~X) & 0x80
|
|
* (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
|
|
* can be done in parallel across the entire word.
|
|
*/
|
|
/*
|
|
* The inner loop deals with two Dwords at a time. This has a
|
|
* slightly higher start-up cost, but we should win quite quickly,
|
|
* especially on cores with a high number of issue slots per
|
|
* cycle, as we get much better parallelism out of the operations.
|
|
*/
|
|
.Lloop:
|
|
ldp data1, data2, [src], #16
|
|
.Lrealigned:
|
|
sub tmp1, data1, zeroones
|
|
orr tmp2, data1, #REP8_7f
|
|
sub tmp3, data2, zeroones
|
|
orr tmp4, data2, #REP8_7f
|
|
bic has_nul1, tmp1, tmp2
|
|
bics has_nul2, tmp3, tmp4
|
|
ccmp has_nul1, #0, #0, eq /* NZCV = 0000 */
|
|
b.eq .Lloop
|
|
|
|
sub len, src, srcin
|
|
cbz has_nul1, .Lnul_in_data2
|
|
CPU_BE( mov data2, data1 ) /*prepare data to re-calculate the syndrome*/
|
|
sub len, len, #8
|
|
mov has_nul2, has_nul1
|
|
.Lnul_in_data2:
|
|
/*
|
|
* For big-endian, carry propagation (if the final byte in the
|
|
* string is 0x01) means we cannot use has_nul directly. The
|
|
* easiest way to get the correct byte is to byte-swap the data
|
|
* and calculate the syndrome a second time.
|
|
*/
|
|
CPU_BE( rev data2, data2 )
|
|
CPU_BE( sub tmp1, data2, zeroones )
|
|
CPU_BE( orr tmp2, data2, #REP8_7f )
|
|
CPU_BE( bic has_nul2, tmp1, tmp2 )
|
|
|
|
sub len, len, #8
|
|
rev has_nul2, has_nul2
|
|
clz pos, has_nul2
|
|
add len, len, pos, lsr #3 /* Bits to bytes. */
|
|
ret
|
|
|
|
.Lmisaligned:
|
|
cmp tmp1, #8
|
|
neg tmp1, tmp1
|
|
ldp data1, data2, [src], #16
|
|
lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
|
|
mov tmp2, #~0
|
|
/* Big-endian. Early bytes are at MSB. */
|
|
CPU_BE( lsl tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */
|
|
/* Little-endian. Early bytes are at LSB. */
|
|
CPU_LE( lsr tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */
|
|
|
|
orr data1, data1, tmp2
|
|
orr data2a, data2, tmp2
|
|
csinv data1, data1, xzr, le
|
|
csel data2, data2, data2a, le
|
|
b .Lrealigned
|
|
ENDPIPROC(strlen)
|
|
EXPORT_SYMBOL_NOKASAN(strlen)
|