diff options
Diffstat (limited to 'libc/arch-arm64/default/bionic/memchr.S')
-rw-r--r-- | libc/arch-arm64/default/bionic/memchr.S | 164 |
1 files changed, 0 insertions, 164 deletions
diff --git a/libc/arch-arm64/default/bionic/memchr.S b/libc/arch-arm64/default/bionic/memchr.S deleted file mode 100644 index 7fbcc8fd4..000000000 --- a/libc/arch-arm64/default/bionic/memchr.S +++ /dev/null @@ -1,164 +0,0 @@ -/* - * - Copyright (c) 2014, ARM Limited - All rights Reserved. - Copyright (c) 2014, Linaro Ltd. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the company nor the names of its contributors - may be used to endorse or promote products derived from this - software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* Assumptions: - * - * ARMv8-a, AArch64 - * Neon Available. - */ - -#include <private/bionic_asm.h> - -/* Arguments and results. */ -#define srcin x0 -#define chrin w1 -#define cntin x2 - -#define result x0 - -#define src x3 -#define tmp x4 -#define wtmp2 w5 -#define synd x6 -#define soff x9 -#define cntrem x10 - -#define vrepchr v0 -#define vdata1 v1 -#define vdata2 v2 -#define vhas_chr1 v3 -#define vhas_chr2 v4 -#define vrepmask v5 -#define vend v6 - -/* - * Core algorithm: - * - * For each 32-byte chunk we calculate a 64-bit syndrome value, with two bits - * per byte. For each tuple, bit 0 is set if the relevant byte matched the - * requested character and bit 1 is not used (faster than using a 32bit - * syndrome). Since the bits in the syndrome reflect exactly the order in which - * things occur in the original string, counting trailing zeros allows to - * identify exactly which byte has matched. - */ - -ENTRY(memchr_default) - /* - * Magic constant 0x40100401 allows us to identify which lane matches - * the requested byte. - */ - cbz cntin, .Lzero_length - mov wtmp2, #0x0401 - movk wtmp2, #0x4010, lsl #16 - dup vrepchr.16b, chrin - /* Work with aligned 32-byte chunks */ - bic src, srcin, #31 - dup vrepmask.4s, wtmp2 - ands soff, srcin, #31 - and cntrem, cntin, #31 - b.eq .Lloop - - /* - * Input string is not 32-byte aligned. We calculate the syndrome - * value for the aligned 32 bytes block containing the first bytes - * and mask the irrelevant part. - */ - - ld1 {vdata1.16b, vdata2.16b}, [src], #32 - sub tmp, soff, #32 - adds cntin, cntin, tmp - cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b - cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b - and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b - and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b - addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */ - addp vend.16b, vend.16b, vend.16b /* 128->64 */ - mov synd, vend.d[0] - /* Clear the soff*2 lower bits */ - lsl tmp, soff, #1 - lsr synd, synd, tmp - lsl synd, synd, tmp - /* The first block can also be the last */ - b.ls .Lmasklast - /* Have we found something already? */ - cbnz synd, .Ltail - -.Lloop: - ld1 {vdata1.16b, vdata2.16b}, [src], #32 - subs cntin, cntin, #32 - cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b - cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b - /* If we're out of data we finish regardless of the result */ - b.ls .Lend - /* Use a fast check for the termination condition */ - orr vend.16b, vhas_chr1.16b, vhas_chr2.16b - addp vend.2d, vend.2d, vend.2d - mov synd, vend.d[0] - /* We're not out of data, loop if we haven't found the character */ - cbz synd, .Lloop - -.Lend: - /* Termination condition found, let's calculate the syndrome value */ - and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b - and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b - addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */ - addp vend.16b, vend.16b, vend.16b /* 128->64 */ - mov synd, vend.d[0] - /* Only do the clear for the last possible block */ - b.hi .Ltail - -.Lmasklast: - /* Clear the (32 - ((cntrem + soff) % 32)) * 2 upper bits */ - add tmp, cntrem, soff - and tmp, tmp, #31 - sub tmp, tmp, #32 - neg tmp, tmp, lsl #1 - lsl synd, synd, tmp - lsr synd, synd, tmp - -.Ltail: - /* Count the trailing zeros using bit reversing */ - rbit synd, synd - /* Compensate the last post-increment */ - sub src, src, #32 - /* Check that we have found a character */ - cmp synd, #0 - /* And count the leading zeros */ - clz synd, synd - /* Compute the potential result */ - add result, src, synd, lsr #1 - /* Select result or NULL */ - csel result, xzr, result, eq - ret - -.Lzero_length: - mov result, xzr - ret -END(memchr_default) |