This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH] faster strcat
- From: OndÅej BÃlka <neleai at seznam dot cz>
- To: libc-alpha at sourceware dot org
- Date: Mon, 8 Oct 2012 13:15:14 +0200
- Subject: [PATCH] faster strcat
This is next version of my patch
http://sourceware.org/ml/libc-alpha/2012-06/msg00489.html
I investigated strcat bit futher and speed degradation
was caused by improper usage of indirect functions.
strcat ifunc first tests bit_Fast_Unaligned_Load which is
false on core2 and AMD processors. Then it checks ssse3 and
calls ssse3 version.
But strcat_ssse3 inlines strlen_sse2_no_bsf which on core2 and phenomII
is slowest strlen variant unless strings is larger than 2000 where
strlen_sse2 takes lead.
Then I deleted strcat variants that are no longer needed.
Files ports/sysdeps/ia64/strcat.c, sysdeps/powerpc/strcat.c, became
duplicates of string/strcat.c.
* string/strcat.c: Reduce algorithm selection
to strlen,strcpy
* string/strncat.c: Likewise
* sysdeps/powerpc/strcat.c: Duplicated string/strcat.c
* ports/sysdeps/ia64/strcat.c: Likewise
* sysdeps/i386/i686/multiarch/Makefile: Updated
* sysdeps/x86_64/multiarch/Makefile: Updated
* sysdeps/i386/i486/strcat.S: No longer needed
* sysdeps/i386/i686/multiarch/strcat-sse2.S:Likewise
* sysdeps/i386/i686/multiarch/strcat-ssse3.S:Likewise
* sysdeps/i386/i686/multiarch/strcat.S:Likewise
* sysdeps/i386/i686/multiarch/strncat-c.c:Likewise
* sysdeps/i386/i686/multiarch/strncat-sse2.S:Likewise
* sysdeps/i386/i686/multiarch/strncat-ssse3.S:Likewise
* sysdeps/i386/i686/multiarch/strncat.S:Likewise
* sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S:Likewise
* sysdeps/x86_64/multiarch/strcat-ssse3.S:Likewise
* sysdeps/x86_64/multiarch/strcat.S:Likewise
* sysdeps/x86_64/multiarch/strncat-c.c:Likewise
* sysdeps/x86_64/multiarch/strncat-sse2-unaligned.S:Likewise
* sysdeps/x86_64/multiarch/strncat-ssse3.S:Likewise
* sysdeps/x86_64/multiarch/strncat.S:Likewise
* sysdeps/x86_64/strcat.S:Likewise
---
ports/sysdeps/ia64/strcat.c | 26 -
string/strcat.c | 29 +-
string/strncat.c | 62 +-
sysdeps/i386/i486/strcat.S | 273 -----
sysdeps/i386/i686/multiarch/Makefile | 3 +-
sysdeps/i386/i686/multiarch/strcat-sse2.S | 1243 ---------------------
sysdeps/i386/i686/multiarch/strcat-ssse3.S | 572 ----------
sysdeps/i386/i686/multiarch/strcat.S | 119 --
sysdeps/i386/i686/multiarch/strncat-c.c | 8 -
sysdeps/i386/i686/multiarch/strncat-sse2.S | 4 -
sysdeps/i386/i686/multiarch/strncat-ssse3.S | 4 -
sysdeps/i386/i686/multiarch/strncat.S | 3 -
sysdeps/powerpc/strcat.c | 30 -
sysdeps/x86_64/multiarch/Makefile | 5 +-
sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S | 53 -
sysdeps/x86_64/multiarch/strcat-ssse3.S | 557 ---------
sysdeps/x86_64/multiarch/strcat.S | 84 --
sysdeps/x86_64/multiarch/strncat-c.c | 8 -
sysdeps/x86_64/multiarch/strncat-sse2-unaligned.S | 3 -
sysdeps/x86_64/multiarch/strncat-ssse3.S | 3 -
sysdeps/x86_64/multiarch/strncat.S | 3 -
sysdeps/x86_64/strcat.S | 259 -----
24 files changed, 15 insertions(+), 4280 deletions(-)
delete mode 100644 ports/sysdeps/ia64/strcat.c
delete mode 100644 sysdeps/i386/i486/strcat.S
delete mode 100644 sysdeps/i386/i686/multiarch/strcat-sse2.S
delete mode 100644 sysdeps/i386/i686/multiarch/strcat-ssse3.S
delete mode 100644 sysdeps/i386/i686/multiarch/strcat.S
delete mode 100644 sysdeps/i386/i686/multiarch/strncat-c.c
delete mode 100644 sysdeps/i386/i686/multiarch/strncat-sse2.S
delete mode 100644 sysdeps/i386/i686/multiarch/strncat-ssse3.S
delete mode 100644 sysdeps/i386/i686/multiarch/strncat.S
delete mode 100644 sysdeps/powerpc/strcat.c
delete mode 100644 sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
delete mode 100644 sysdeps/x86_64/multiarch/strcat-ssse3.S
delete mode 100644 sysdeps/x86_64/multiarch/strcat.S
delete mode 100644 sysdeps/x86_64/multiarch/strncat-c.c
delete mode 100644 sysdeps/x86_64/multiarch/strncat-sse2-unaligned.S
delete mode 100644 sysdeps/x86_64/multiarch/strncat-ssse3.S
delete mode 100644 sysdeps/x86_64/multiarch/strncat.S
delete mode 100644 sysdeps/x86_64/strcat.S
diff --git a/ports/sysdeps/ia64/strcat.c b/ports/sysdeps/ia64/strcat.c
deleted file mode 100644
index 53cd4d1..0000000
--- a/ports/sysdeps/ia64/strcat.c
+++ /dev/null
@@ -1,26 +0,0 @@
-/* Copyright (C) 2004 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <string.h>
-
-char *
-strcat (char *dest, const char *src)
-{
- strcpy (dest + strlen (dest), src);
- return dest;
-}
-libc_hidden_builtin_def (strcat)
diff --git a/string/strcat.c b/string/strcat.c
index f9e4bc6..28575d0 100644
--- a/string/strcat.c
+++ b/string/strcat.c
@@ -1,4 +1,5 @@
-/* Copyright (C) 1991, 1997, 2003 Free Software Foundation, Inc.
+/* strcat version that uses fast strcpy/strlen.
+ Copyright (C) 1997, 2003 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -16,36 +17,14 @@
<http://www.gnu.org/licenses/>. */
#include <string.h>
-#include <memcopy.h>
#undef strcat
/* Append SRC on the end of DEST. */
char *
-strcat (dest, src)
- char *dest;
- const char *src;
+strcat (char *dest, const char *src)
{
- char *s1 = dest;
- const char *s2 = src;
- char c;
-
- /* Find the end of the string. */
- do
- c = *s1++;
- while (c != '\0');
-
- /* Make S1 point before the next character, so we can increment
- it while memory is read (wins on pipelined cpus). */
- s1 -= 2;
-
- do
- {
- c = *s2++;
- *++s1 = c;
- }
- while (c != '\0');
-
+ strcpy (dest + strlen (dest), src);
return dest;
}
libc_hidden_builtin_def (strcat)
diff --git a/string/strncat.c b/string/strncat.c
index dcfb04d..17b4c9a 100644
--- a/string/strncat.c
+++ b/string/strncat.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1991,1997,2011 Free Software Foundation, Inc.
+/* Copyright (C) 1991-2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -17,66 +17,20 @@
#include <string.h>
-#ifdef _LIBC
-# include <memcopy.h>
-#endif
-
#ifndef STRNCAT
# undef strncat
# define STRNCAT strncat
#endif
char *
-STRNCAT (char *s1, const char *s2, size_t n)
+STRNCAT (char *dest, const char *src, size_t n)
{
- char c;
- char *s = s1;
-
- /* Find the end of S1. */
- do
- c = *s1++;
- while (c != '\0');
-
- /* Make S1 point before next character, so we can increment
- it while memory is read (wins on pipelined cpus). */
- s1 -= 2;
-
- if (n >= 4)
- {
- size_t n4 = n >> 2;
- do
- {
- c = *s2++;
- *++s1 = c;
- if (c == '\0')
- return s;
- c = *s2++;
- *++s1 = c;
- if (c == '\0')
- return s;
- c = *s2++;
- *++s1 = c;
- if (c == '\0')
- return s;
- c = *s2++;
- *++s1 = c;
- if (c == '\0')
- return s;
- } while (--n4 > 0);
- n &= 3;
- }
-
- while (n > 0)
- {
- c = *s2++;
- *++s1 = c;
- if (c == '\0')
- return s;
- n--;
- }
+ size_t dest_len = strlen (dest);
+ size_t src_len = strnlen (src , n);
- if (c != '\0')
- *++s1 = '\0';
+ if (src_len == n)
+ {
+ memcpy (dest + dest_len, src, n);
+ dest[dest_len + n] = '\0';
+ }
+ else
+ strcpy (dest + dest_len, src);
- return s;
+ return dest;
}
diff --git a/sysdeps/i386/i486/strcat.S b/sysdeps/i386/i486/strcat.S
deleted file mode 100644
index 7596a0d..0000000
--- a/sysdeps/i386/i486/strcat.S
+++ /dev/null
@@ -1,273 +0,0 @@
-/* strcat(dest, src) -- Append SRC on the end of DEST.
- For Intel 80x86, x>=4.
- Copyright (C) 1994-1997,2000,2003,2005 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@ipd.info.uni-karlsruhe.de>.
- Optimised a little by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-#include "bp-sym.h"
-#include "bp-asm.h"
-
-#define PARMS LINKAGE+4 /* space for 1 saved reg */
-#define RTN PARMS
-#define DEST RTN+RTN_SIZE
-#define SRC DEST+PTR_SIZE
-
- .text
-ENTRY (BP_SYM (strcat))
- ENTER
-
- pushl %edi /* Save callee-safe register. */
- cfi_adjust_cfa_offset (4)
-
- movl DEST(%esp), %edx
- movl SRC(%esp), %ecx
- CHECK_BOUNDS_LOW (%edx, DEST(%esp))
- CHECK_BOUNDS_LOW (%ecx, SRC(%esp))
-
- testb $0xff, (%ecx) /* Is source string empty? */
- jz L(8) /* yes => return */
-
- /* Test the first bytes separately until destination is aligned. */
- testl $3, %edx /* destination pointer aligned? */
- jz L(1) /* yes => begin scan loop */
- testb $0xff, (%edx) /* is end of string? */
- jz L(2) /* yes => start appending */
- incl %edx /* increment source pointer */
-
- testl $3, %edx /* destination pointer aligned? */
- jz L(1) /* yes => begin scan loop */
- testb $0xff, (%edx) /* is end of string? */
- jz L(2) /* yes => start appending */
- incl %edx /* increment source pointer */
-
- testl $3, %edx /* destination pointer aligned? */
- jz L(1) /* yes => begin scan loop */
- testb $0xff, (%edx) /* is end of string? */
- jz L(2) /* yes => start appending */
- incl %edx /* increment source pointer */
-
- /* Now we are aligned. Begin scan loop. */
- jmp L(1)
-
- cfi_rel_offset (edi, 0)
- ALIGN(4)
-
-L(4): addl $16,%edx /* increment destination pointer for round */
-
-L(1): movl (%edx), %eax /* get word (= 4 bytes) in question */
- movl $0xfefefeff, %edi /* magic value */
-
- /* If you compare this with the algorithm in memchr.S you will
- notice that here is an `xorl' statement missing. But you must
- not forget that we are looking for C == 0 and `xorl $0, %eax'
- is a no-op. */
-
- addl %eax, %edi /* add the magic value to the word. We get
- carry bits reported for each byte which
- is *not* 0 */
-
- /* According to the algorithm we had to reverse the effect of the
- XOR first and then test the overflow bits. But because the
- following XOR would destroy the carry flag and it would (in a
- representation with more than 32 bits) not alter then last
- overflow, we can now test this condition. If no carry is signaled
- no overflow must have occurred in the last byte => it was 0. */
- jnc L(3)
-
- /* We are only interested in carry bits that change due to the
- previous add, so remove original bits */
- xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */
-
- /* Now test for the other three overflow bits. */
- orl $0xfefefeff, %edi /* set all non-carry bits */
- incl %edi /* add 1: if one carry bit was *not* set
- the addition will not result in 0. */
-
- /* If at least one byte of the word is C we don't get 0 in %ecx. */
- jnz L(3)
-
- movl 4(%edx), %eax /* get word from source */
- movl $0xfefefeff, %edi /* magic value */
- addl %eax, %edi /* add the magic value to the word. We get
- carry bits reported for each byte which
- is *not* 0 */
- jnc L(5) /* highest byte is C => stop copying */
- xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */
- orl $0xfefefeff, %edi /* set all non-carry bits */
- incl %edi /* add 1: if one carry bit was *not* set
- the addition will not result in 0. */
- jnz L(5) /* one byte is NUL => stop copying */
-
- movl 8(%edx), %eax /* get word from source */
- movl $0xfefefeff, %edi /* magic value */
- addl %eax, %edi /* add the magic value to the word. We get
- carry bits reported for each byte which
- is *not* 0 */
- jnc L(6) /* highest byte is C => stop copying */
- xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */
- orl $0xfefefeff, %edi /* set all non-carry bits */
- incl %edi /* add 1: if one carry bit was *not* set
- the addition will not result in 0. */
- jnz L(6) /* one byte is NUL => stop copying */
-
- movl 12(%edx), %eax /* get word from source */
- movl $0xfefefeff, %edi /* magic value */
- addl %eax, %edi /* add the magic value to the word. We get
- carry bits reported for each byte which
- is *not* 0 */
- jnc L(7) /* highest byte is C => stop copying */
- xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */
- orl $0xfefefeff, %edi /* set all non-carry bits */
- incl %edi /* add 1: if one carry bit was *not* set
- the addition will not result in 0. */
- jz L(4) /* no byte is NUL => carry on copying */
-
-L(7): addl $4, %edx /* adjust source pointer */
-L(6): addl $4, %edx
-L(5): addl $4, %edx
-
-L(3): testb %al, %al /* is first byte NUL? */
- jz L(2) /* yes => start copying */
- incl %edx /* increment source pointer */
-
- testb %ah, %ah /* is second byte NUL? */
- jz L(2) /* yes => start copying */
- incl %edx /* increment source pointer */
-
- testl $0xff0000, %eax /* is third byte NUL? */
- jz L(2) /* yes => start copying */
- incl %edx /* increment source pointer */
-
-L(2): subl %ecx, %edx /* reduce number of loop variants */
-
- /* Now we have to align the source pointer. */
- testl $3, %ecx /* pointer correctly aligned? */
- jz L(29) /* yes => start copy loop */
- movb (%ecx), %al /* get first byte */
- movb %al, (%ecx,%edx) /* and store it */
- andb %al, %al /* is byte NUL? */
- jz L(8) /* yes => return */
- incl %ecx /* increment pointer */
-
- testl $3, %ecx /* pointer correctly aligned? */
- jz L(29) /* yes => start copy loop */
- movb (%ecx), %al /* get first byte */
- movb %al, (%ecx,%edx) /* and store it */
- andb %al, %al /* is byte NUL? */
- jz L(8) /* yes => return */
- incl %ecx /* increment pointer */
-
- testl $3, %ecx /* pointer correctly aligned? */
- jz L(29) /* yes => start copy loop */
- movb (%ecx), %al /* get first byte */
- movb %al, (%ecx,%edx) /* and store it */
- andb %al, %al /* is byte NUL? */
- jz L(8) /* yes => return */
- incl %ecx /* increment pointer */
-
- /* Now we are aligned. */
- jmp L(29) /* start copy loop */
-
- ALIGN(4)
-
-L(28): movl %eax, 12(%ecx,%edx)/* store word at destination */
- addl $16, %ecx /* adjust pointer for full round */
-
-L(29): movl (%ecx), %eax /* get word from source */
- movl $0xfefefeff, %edi /* magic value */
- addl %eax, %edi /* add the magic value to the word. We get
- carry bits reported for each byte which
- is *not* 0 */
- jnc L(9) /* highest byte is C => stop copying */
- xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */
- orl $0xfefefeff, %edi /* set all non-carry bits */
- incl %edi /* add 1: if one carry bit was *not* set
- the addition will not result in 0. */
- jnz L(9) /* one byte is NUL => stop copying */
- movl %eax, (%ecx,%edx) /* store word to destination */
-
- movl 4(%ecx), %eax /* get word from source */
- movl $0xfefefeff, %edi /* magic value */
- addl %eax, %edi /* add the magic value to the word. We get
- carry bits reported for each byte which
- is *not* 0 */
- jnc L(91) /* highest byte is C => stop copying */
- xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */
- orl $0xfefefeff, %edi /* set all non-carry bits */
- incl %edi /* add 1: if one carry bit was *not* set
- the addition will not result in 0. */
- jnz L(91) /* one byte is NUL => stop copying */
- movl %eax, 4(%ecx,%edx) /* store word to destination */
-
- movl 8(%ecx), %eax /* get word from source */
- movl $0xfefefeff, %edi /* magic value */
- addl %eax, %edi /* add the magic value to the word. We get
- carry bits reported for each byte which
- is *not* 0 */
- jnc L(92) /* highest byte is C => stop copying */
- xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */
- orl $0xfefefeff, %edi /* set all non-carry bits */
- incl %edi /* add 1: if one carry bit was *not* set
- the addition will not result in 0. */
- jnz L(92) /* one byte is NUL => stop copying */
- movl %eax, 8(%ecx,%edx) /* store word to destination */
-
- movl 12(%ecx), %eax /* get word from source */
- movl $0xfefefeff, %edi /* magic value */
- addl %eax, %edi /* add the magic value to the word. We get
- carry bits reported for each byte which
- is *not* 0 */
- jnc L(93) /* highest byte is C => stop copying */
- xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */
- orl $0xfefefeff, %edi /* set all non-carry bits */
- incl %edi /* add 1: if one carry bit was *not* set
- the addition will not result in 0. */
- jz L(28) /* no is NUL => carry on copying */
-
-L(93): addl $4, %ecx /* adjust pointer */
-L(92): addl $4, %ecx
-L(91): addl $4, %ecx
-
-L(9): movb %al, (%ecx,%edx) /* store first byte of last word */
- orb %al, %al /* is it NUL? */
- jz L(8) /* yes => return */
-
- movb %ah, 1(%ecx,%edx) /* store second byte of last word */
- orb %ah, %ah /* is it NUL? */
- jz L(8) /* yes => return */
-
- shrl $16, %eax /* make upper bytes accessible */
- movb %al, 2(%ecx,%edx) /* store third byte of last word */
- orb %al, %al /* is it NUL? */
- jz L(8) /* yes => return */
-
- movb %ah, 3(%ecx,%edx) /* store fourth byte of last word */
-
-L(8): /* GKM FIXME: check high bounds */
- movl DEST(%esp), %eax /* start address of destination is result */
- RETURN_BOUNDED_POINTER (DEST(%esp))
- popl %edi /* restore saved register */
- cfi_adjust_cfa_offset (-4)
- cfi_restore (edi)
-
- LEAVE
- RET_PTR
-END (BP_SYM (strcat))
-libc_hidden_builtin_def (strcat)
diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile
index 8946bfa..92a2b8f 100644
--- a/sysdeps/i386/i686/multiarch/Makefile
+++ b/sysdeps/i386/i686/multiarch/Makefile
@@ -14,8 +14,7 @@ sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \
memcmp-ssse3 memcmp-sse4 strcasestr-nonascii varshift \
strlen-sse2 strlen-sse2-bsf strncpy-c strcpy-ssse3 \
strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 strcpy-sse2 \
- strncpy-sse2 stpcpy-sse2 stpncpy-sse2 strcat-ssse3 \
- strcat-sse2 strncat-ssse3 strncat-sse2 strncat-c \
+ strncpy-sse2 stpcpy-sse2 stpncpy-sse2 \
strchr-sse2 strrchr-sse2 strchr-sse2-bsf strrchr-sse2-bsf \
memchr-sse2 memchr-sse2-bsf \
memrchr-sse2 memrchr-sse2-bsf memrchr-c \
diff --git a/sysdeps/i386/i686/multiarch/strcat-sse2.S b/sysdeps/i386/i686/multiarch/strcat-sse2.S
deleted file mode 100644
index e75f92c..0000000
--- a/sysdeps/i386/i686/multiarch/strcat-sse2.S
+++ /dev/null
@@ -1,1243 +0,0 @@
-/* strcat with SSE2
- Copyright (C) 2011-2012 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-
-#ifndef NOT_IN_libc
-
-# include <sysdep.h>
-
-
-# define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# ifdef SHARED
-# define JMPTBL(I, B) I - B
-
-/* Load an entry in a jump table into ECX and branch to it. TABLE is a
- jump table with relative offsets. INDEX is a register contains the
- index into the jump table. SCALE is the scale of INDEX. */
-
-# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
- /* We first load PC into ECX. */ \
- SETUP_PIC_REG(cx); \
- /* Get the address of the jump table. */ \
- addl $(TABLE - .), %ecx; \
- /* Get the entry and convert the relative offset to the \
- absolute address. */ \
- addl (%ecx,INDEX,SCALE), %ecx; \
- /* We loaded the jump table and adjuested ECX. Go. */ \
- jmp *%ecx
-# else
-# define JMPTBL(I, B) I
-
-/* Branch to an entry in a jump table. TABLE is a jump table with
- absolute offsets. INDEX is a register contains the index into the
- jump table. SCALE is the scale of INDEX. */
-
-# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
- jmp *TABLE(,INDEX,SCALE)
-# endif
-
-# ifndef STRCAT
-# define STRCAT __strcat_sse2
-# endif
-
-# define PARMS 4
-# define STR1 PARMS+4
-# define STR2 STR1+4
-
-# ifdef USE_AS_STRNCAT
-# define LEN STR2+8
-# define STR3 STR1+4
-# else
-# define STR3 STR1
-# endif
-
-# define USE_AS_STRCAT
-# ifdef USE_AS_STRNCAT
-# define RETURN POP(%ebx); POP(%esi); ret; CFI_PUSH(%ebx); CFI_PUSH(%esi);
-# else
-# define RETURN POP(%esi); ret; CFI_PUSH(%esi);
-# endif
-
-.text
-ENTRY (STRCAT)
- PUSH (%esi)
- mov STR1(%esp), %eax
- mov STR2(%esp), %esi
-# ifdef USE_AS_STRNCAT
- PUSH (%ebx)
- movl LEN(%esp), %ebx
- test %ebx, %ebx
- jz L(ExitZero)
-# endif
- cmpb $0, (%esi)
- mov %esi, %ecx
- mov %eax, %edx
- jz L(ExitZero)
-
- and $63, %ecx
- and $63, %edx
- cmp $32, %ecx
- ja L(StrlenCore7_1)
- cmp $48, %edx
- ja L(alignment_prolog)
-
- pxor %xmm0, %xmm0
- pxor %xmm4, %xmm4
- pxor %xmm7, %xmm7
- movdqu (%eax), %xmm1
- movdqu (%esi), %xmm5
- pcmpeqb %xmm1, %xmm0
- movdqu 16(%esi), %xmm6
- pmovmskb %xmm0, %ecx
- pcmpeqb %xmm5, %xmm4
- pcmpeqb %xmm6, %xmm7
- test %ecx, %ecx
- jnz L(exit_less16_)
- mov %eax, %ecx
- and $-16, %eax
- jmp L(loop_prolog)
-
-L(alignment_prolog):
- pxor %xmm0, %xmm0
- pxor %xmm4, %xmm4
- mov %edx, %ecx
- pxor %xmm7, %xmm7
- and $15, %ecx
- and $-16, %eax
- pcmpeqb (%eax), %xmm0
- movdqu (%esi), %xmm5
- movdqu 16(%esi), %xmm6
- pmovmskb %xmm0, %edx
- pcmpeqb %xmm5, %xmm4
- shr %cl, %edx
- pcmpeqb %xmm6, %xmm7
- test %edx, %edx
- jnz L(exit_less16)
- add %eax, %ecx
-
- pxor %xmm0, %xmm0
-L(loop_prolog):
- pxor %xmm1, %xmm1
- pxor %xmm2, %xmm2
- pxor %xmm3, %xmm3
- .p2align 4
-L(align16_loop):
- pcmpeqb 16(%eax), %xmm0
- pmovmskb %xmm0, %edx
- test %edx, %edx
- jnz L(exit16)
-
- pcmpeqb 32(%eax), %xmm1
- pmovmskb %xmm1, %edx
- test %edx, %edx
- jnz L(exit32)
-
- pcmpeqb 48(%eax), %xmm2
- pmovmskb %xmm2, %edx
- test %edx, %edx
- jnz L(exit48)
-
- pcmpeqb 64(%eax), %xmm3
- pmovmskb %xmm3, %edx
- lea 64(%eax), %eax
- test %edx, %edx
- jz L(align16_loop)
- bsf %edx, %edx
- add %edx, %eax
- jmp L(StartStrcpyPart)
-
- .p2align 4
-L(exit16):
- bsf %edx, %edx
- lea 16(%eax, %edx), %eax
- jmp L(StartStrcpyPart)
-
- .p2align 4
-L(exit32):
- bsf %edx, %edx
- lea 32(%eax, %edx), %eax
- jmp L(StartStrcpyPart)
-
- .p2align 4
-L(exit48):
- bsf %edx, %edx
- lea 48(%eax, %edx), %eax
- jmp L(StartStrcpyPart)
-
- .p2align 4
-L(exit_less16):
- bsf %edx, %edx
- add %ecx, %eax
- add %edx, %eax
- jmp L(StartStrcpyPart)
-
- .p2align 4
-L(exit_less16_):
- bsf %ecx, %ecx
- add %ecx, %eax
-
- .p2align 4
-L(StartStrcpyPart):
- pmovmskb %xmm4, %edx
-# ifdef USE_AS_STRNCAT
- cmp $16, %ebx
- jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
-# endif
- test %edx, %edx
- jnz L(CopyFrom1To16BytesTail1)
-
- movdqu %xmm5, (%eax)
- pmovmskb %xmm7, %edx
-# ifdef USE_AS_STRNCAT
- cmp $32, %ebx
- jbe L(CopyFrom1To32Bytes1Case2OrCase3)
-# endif
- test %edx, %edx
- jnz L(CopyFrom1To32Bytes1)
-
- mov %esi, %ecx
- and $-16, %esi
- and $15, %ecx
- pxor %xmm0, %xmm0
-# ifdef USE_AS_STRNCAT
- add %ecx, %ebx
-# endif
- sub %ecx, %eax
- jmp L(Unalign16Both)
-
-L(StrlenCore7_1):
- mov %eax, %ecx
- pxor %xmm0, %xmm0
- and $15, %ecx
- and $-16, %eax
- pcmpeqb (%eax), %xmm0
- pmovmskb %xmm0, %edx
- shr %cl, %edx
- test %edx, %edx
- jnz L(exit_less16_1)
- add %eax, %ecx
-
- pxor %xmm0, %xmm0
- pxor %xmm1, %xmm1
- pxor %xmm2, %xmm2
- pxor %xmm3, %xmm3
-
- .p2align 4
-L(align16_loop_1):
- pcmpeqb 16(%eax), %xmm0
- pmovmskb %xmm0, %edx
- test %edx, %edx
- jnz L(exit16_1)
-
- pcmpeqb 32(%eax), %xmm1
- pmovmskb %xmm1, %edx
- test %edx, %edx
- jnz L(exit32_1)
-
- pcmpeqb 48(%eax), %xmm2
- pmovmskb %xmm2, %edx
- test %edx, %edx
- jnz L(exit48_1)
-
- pcmpeqb 64(%eax), %xmm3
- pmovmskb %xmm3, %edx
- lea 64(%eax), %eax
- test %edx, %edx
- jz L(align16_loop_1)
- bsf %edx, %edx
- add %edx, %eax
- jmp L(StartStrcpyPart_1)
-
- .p2align 4
-L(exit16_1):
- bsf %edx, %edx
- lea 16(%eax, %edx), %eax
- jmp L(StartStrcpyPart_1)
-
- .p2align 4
-L(exit32_1):
- bsf %edx, %edx
- lea 32(%eax, %edx), %eax
- jmp L(StartStrcpyPart_1)
-
- .p2align 4
-L(exit48_1):
- bsf %edx, %edx
- lea 48(%eax, %edx), %eax
- jmp L(StartStrcpyPart_1)
-
- .p2align 4
-L(exit_less16_1):
- bsf %edx, %edx
- add %ecx, %eax
- add %edx, %eax
-
- .p2align 4
-L(StartStrcpyPart_1):
- mov %esi, %ecx
- and $15, %ecx
- and $-16, %esi
- pxor %xmm0, %xmm0
- pxor %xmm1, %xmm1
-
-# ifdef USE_AS_STRNCAT
- cmp $48, %ebx
- ja L(BigN)
-# endif
- pcmpeqb (%esi), %xmm1
-# ifdef USE_AS_STRNCAT
- add %ecx, %ebx
-# endif
- pmovmskb %xmm1, %edx
- shr %cl, %edx
-# ifdef USE_AS_STRNCAT
- cmp $16, %ebx
- jbe L(CopyFrom1To16BytesTailCase2OrCase3)
-# endif
- test %edx, %edx
- jnz L(CopyFrom1To16BytesTail)
-
- pcmpeqb 16(%esi), %xmm0
- pmovmskb %xmm0, %edx
-# ifdef USE_AS_STRNCAT
- cmp $32, %ebx
- jbe L(CopyFrom1To32BytesCase2OrCase3)
-# endif
- test %edx, %edx
- jnz L(CopyFrom1To32Bytes)
-
- movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
- movdqu %xmm1, (%eax)
- sub %ecx, %eax
-
- .p2align 4
-L(Unalign16Both):
- mov $16, %ecx
- movdqa (%esi, %ecx), %xmm1
- movaps 16(%esi, %ecx), %xmm2
- movdqu %xmm1, (%eax, %ecx)
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %edx
- add $16, %ecx
-# ifdef USE_AS_STRNCAT
- sub $48, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
-# endif
- test %edx, %edx
- jnz L(CopyFrom1To16Bytes)
-L(Unalign16BothBigN):
- movaps 16(%esi, %ecx), %xmm3
- movdqu %xmm2, (%eax, %ecx)
- pcmpeqb %xmm3, %xmm0
- pmovmskb %xmm0, %edx
- add $16, %ecx
-# ifdef USE_AS_STRNCAT
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
-# endif
- test %edx, %edx
- jnz L(CopyFrom1To16Bytes)
-
- movaps 16(%esi, %ecx), %xmm4
- movdqu %xmm3, (%eax, %ecx)
- pcmpeqb %xmm4, %xmm0
- pmovmskb %xmm0, %edx
- add $16, %ecx
-# ifdef USE_AS_STRNCAT
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
-# endif
- test %edx, %edx
- jnz L(CopyFrom1To16Bytes)
-
- movaps 16(%esi, %ecx), %xmm1
- movdqu %xmm4, (%eax, %ecx)
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %edx
- add $16, %ecx
-# ifdef USE_AS_STRNCAT
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
-# endif
- test %edx, %edx
- jnz L(CopyFrom1To16Bytes)
-
- movaps 16(%esi, %ecx), %xmm2
- movdqu %xmm1, (%eax, %ecx)
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %edx
- add $16, %ecx
-# ifdef USE_AS_STRNCAT
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
-# endif
- test %edx, %edx
- jnz L(CopyFrom1To16Bytes)
-
- movaps 16(%esi, %ecx), %xmm3
- movdqu %xmm2, (%eax, %ecx)
- pcmpeqb %xmm3, %xmm0
- pmovmskb %xmm0, %edx
- add $16, %ecx
-# ifdef USE_AS_STRNCAT
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
-# endif
- test %edx, %edx
- jnz L(CopyFrom1To16Bytes)
-
- movdqu %xmm3, (%eax, %ecx)
- mov %esi, %edx
- lea 16(%esi, %ecx), %esi
- and $-0x40, %esi
- sub %esi, %edx
- sub %edx, %eax
-# ifdef USE_AS_STRNCAT
- lea 128(%ebx, %edx), %ebx
-# endif
- movaps (%esi), %xmm2
- movaps %xmm2, %xmm4
- movaps 16(%esi), %xmm5
- movaps 32(%esi), %xmm3
- movaps %xmm3, %xmm6
- movaps 48(%esi), %xmm7
- pminub %xmm5, %xmm2
- pminub %xmm7, %xmm3
- pminub %xmm2, %xmm3
- pcmpeqb %xmm0, %xmm3
- pmovmskb %xmm3, %edx
-# ifdef USE_AS_STRNCAT
- sub $64, %ebx
- jbe L(UnalignedLeaveCase2OrCase3)
-# endif
- test %edx, %edx
- jnz L(Unaligned64Leave)
-
- .p2align 4
-L(Unaligned64Loop_start):
- add $64, %eax
- add $64, %esi
- movdqu %xmm4, -64(%eax)
- movaps (%esi), %xmm2
- movdqa %xmm2, %xmm4
- movdqu %xmm5, -48(%eax)
- movaps 16(%esi), %xmm5
- pminub %xmm5, %xmm2
- movaps 32(%esi), %xmm3
- movdqu %xmm6, -32(%eax)
- movaps %xmm3, %xmm6
- movdqu %xmm7, -16(%eax)
- movaps 48(%esi), %xmm7
- pminub %xmm7, %xmm3
- pminub %xmm2, %xmm3
- pcmpeqb %xmm0, %xmm3
- pmovmskb %xmm3, %edx
-# ifdef USE_AS_STRNCAT
- sub $64, %ebx
- jbe L(UnalignedLeaveCase2OrCase3)
-# endif
- test %edx, %edx
- jz L(Unaligned64Loop_start)
-
-L(Unaligned64Leave):
- pxor %xmm1, %xmm1
-
- pcmpeqb %xmm4, %xmm0
- pcmpeqb %xmm5, %xmm1
- pmovmskb %xmm0, %edx
- pmovmskb %xmm1, %ecx
- test %edx, %edx
- jnz L(CopyFrom1To16BytesUnaligned_0)
- test %ecx, %ecx
- jnz L(CopyFrom1To16BytesUnaligned_16)
-
- pcmpeqb %xmm6, %xmm0
- pcmpeqb %xmm7, %xmm1
- pmovmskb %xmm0, %edx
- pmovmskb %xmm1, %ecx
- test %edx, %edx
- jnz L(CopyFrom1To16BytesUnaligned_32)
-
- bsf %ecx, %edx
- movdqu %xmm4, (%eax)
- movdqu %xmm5, 16(%eax)
- movdqu %xmm6, 32(%eax)
- add $48, %esi
- add $48, %eax
- BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-# ifdef USE_AS_STRNCAT
- .p2align 4
-L(BigN):
- pcmpeqb (%esi), %xmm1
- pmovmskb %xmm1, %edx
- shr %cl, %edx
- test %edx, %edx
- jnz L(CopyFrom1To16BytesTail)
-
- pcmpeqb 16(%esi), %xmm0
- pmovmskb %xmm0, %edx
- test %edx, %edx
- jnz L(CopyFrom1To32Bytes)
-
- movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
- movdqu %xmm1, (%eax)
- sub %ecx, %eax
- sub $48, %ebx
- add %ecx, %ebx
-
- mov $16, %ecx
- movdqa (%esi, %ecx), %xmm1
- movaps 16(%esi, %ecx), %xmm2
- movdqu %xmm1, (%eax, %ecx)
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %edx
- add $16, %ecx
- test %edx, %edx
- jnz L(CopyFrom1To16Bytes)
- jmp L(Unalign16BothBigN)
-# endif
-
-/*------------end of main part-------------------------------*/
-
-/* Case1 */
- .p2align 4
-L(CopyFrom1To16Bytes):
- add %ecx, %eax
- add %ecx, %esi
- bsf %edx, %edx
- BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
- .p2align 4
-L(CopyFrom1To16BytesTail):
- add %ecx, %esi
- bsf %edx, %edx
- BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
- .p2align 4
-L(CopyFrom1To32Bytes1):
- add $16, %esi
- add $16, %eax
-L(CopyFrom1To16BytesTail1):
- bsf %edx, %edx
- BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
- .p2align 4
-L(CopyFrom1To32Bytes):
- bsf %edx, %edx
- add %ecx, %esi
- add $16, %edx
- sub %ecx, %edx
- BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
- .p2align 4
-L(CopyFrom1To16BytesUnaligned_0):
- bsf %edx, %edx
- BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
- .p2align 4
-L(CopyFrom1To16BytesUnaligned_16):
- bsf %ecx, %edx
- movdqu %xmm4, (%eax)
- add $16, %esi
- add $16, %eax
- BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
- .p2align 4
-L(CopyFrom1To16BytesUnaligned_32):
- bsf %edx, %edx
- movdqu %xmm4, (%eax)
- movdqu %xmm5, 16(%eax)
- add $32, %esi
- add $32, %eax
- BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-# ifdef USE_AS_STRNCAT
-
- .p2align 4
-L(CopyFrom1To16BytesExit):
- BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-/* Case2 */
-
- .p2align 4
-L(CopyFrom1To16BytesCase2):
- add $16, %ebx
- add %ecx, %eax
- add %ecx, %esi
- bsf %edx, %edx
- cmp %ebx, %edx
- jb L(CopyFrom1To16BytesExit)
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
- .p2align 4
-L(CopyFrom1To32BytesCase2):
- sub %ecx, %ebx
- add %ecx, %esi
- bsf %edx, %edx
- add $16, %edx
- sub %ecx, %edx
- cmp %ebx, %edx
- jb L(CopyFrom1To16BytesExit)
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
-L(CopyFrom1To16BytesTailCase2):
- sub %ecx, %ebx
- add %ecx, %esi
- bsf %edx, %edx
- cmp %ebx, %edx
- jb L(CopyFrom1To16BytesExit)
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
-L(CopyFrom1To16BytesTail1Case2):
- bsf %edx, %edx
- cmp %ebx, %edx
- jb L(CopyFrom1To16BytesExit)
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
-/* Case2 or Case3, Case3 */
-
- .p2align 4
-L(CopyFrom1To16BytesCase2OrCase3):
- test %edx, %edx
- jnz L(CopyFrom1To16BytesCase2)
-L(CopyFrom1To16BytesCase3):
- add $16, %ebx
- add %ecx, %eax
- add %ecx, %esi
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
- .p2align 4
-L(CopyFrom1To32BytesCase2OrCase3):
- test %edx, %edx
- jnz L(CopyFrom1To32BytesCase2)
- sub %ecx, %ebx
- add %ecx, %esi
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
- .p2align 4
-L(CopyFrom1To16BytesTailCase2OrCase3):
- test %edx, %edx
- jnz L(CopyFrom1To16BytesTailCase2)
- sub %ecx, %ebx
- add %ecx, %esi
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
- .p2align 4
-L(CopyFrom1To32Bytes1Case2OrCase3):
- add $16, %eax
- add $16, %esi
- sub $16, %ebx
-L(CopyFrom1To16BytesTail1Case2OrCase3):
- test %edx, %edx
- jnz L(CopyFrom1To16BytesTail1Case2)
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
-# endif
-
-# ifdef USE_AS_STRNCAT
- .p2align 4
-L(StrncatExit0):
- movb %bh, (%eax)
- mov STR3(%esp), %eax
- RETURN
-# endif
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit1):
- movb %bh, 1(%eax)
-# endif
-L(Exit1):
-# ifdef USE_AS_STRNCAT
- movb (%esi), %dh
-# endif
- movb %dh, (%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit2):
- movb %bh, 2(%eax)
-# endif
-L(Exit2):
- movw (%esi), %dx
- movw %dx, (%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit3):
- movb %bh, 3(%eax)
-# endif
-L(Exit3):
- movw (%esi), %cx
- movw %cx, (%eax)
-# ifdef USE_AS_STRNCAT
- movb 2(%esi), %dh
-# endif
- movb %dh, 2(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit4):
- movb %bh, 4(%eax)
-# endif
-L(Exit4):
- movl (%esi), %edx
- movl %edx, (%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit5):
- movb %bh, 5(%eax)
-# endif
-L(Exit5):
- movl (%esi), %ecx
-# ifdef USE_AS_STRNCAT
- movb 4(%esi), %dh
-# endif
- movb %dh, 4(%eax)
- movl %ecx, (%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit6):
- movb %bh, 6(%eax)
-# endif
-L(Exit6):
- movl (%esi), %ecx
- movw 4(%esi), %dx
- movl %ecx, (%eax)
- movw %dx, 4(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit7):
- movb %bh, 7(%eax)
-# endif
-L(Exit7):
- movl (%esi), %ecx
- movl 3(%esi), %edx
- movl %ecx, (%eax)
- movl %edx, 3(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit8):
- movb %bh, 8(%eax)
-# endif
-L(Exit8):
- movlpd (%esi), %xmm0
- movlpd %xmm0, (%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit9):
- movb %bh, 9(%eax)
-# endif
-L(Exit9):
- movlpd (%esi), %xmm0
-# ifdef USE_AS_STRNCAT
- movb 8(%esi), %dh
-# endif
- movb %dh, 8(%eax)
- movlpd %xmm0, (%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit10):
- movb %bh, 10(%eax)
-# endif
-L(Exit10):
- movlpd (%esi), %xmm0
- movw 8(%esi), %dx
- movlpd %xmm0, (%eax)
- movw %dx, 8(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit11):
- movb %bh, 11(%eax)
-# endif
-L(Exit11):
- movlpd (%esi), %xmm0
- movl 7(%esi), %edx
- movlpd %xmm0, (%eax)
- movl %edx, 7(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit12):
- movb %bh, 12(%eax)
-# endif
-L(Exit12):
- movlpd (%esi), %xmm0
- movl 8(%esi), %edx
- movlpd %xmm0, (%eax)
- movl %edx, 8(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit13):
- movb %bh, 13(%eax)
-# endif
-L(Exit13):
- movlpd (%esi), %xmm0
- movlpd 5(%esi), %xmm1
- movlpd %xmm0, (%eax)
- movlpd %xmm1, 5(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit14):
- movb %bh, 14(%eax)
-# endif
-L(Exit14):
- movlpd (%esi), %xmm0
- movlpd 6(%esi), %xmm1
- movlpd %xmm0, (%eax)
- movlpd %xmm1, 6(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit15):
- movb %bh, 15(%eax)
-# endif
-L(Exit15):
- movlpd (%esi), %xmm0
- movlpd 7(%esi), %xmm1
- movlpd %xmm0, (%eax)
- movlpd %xmm1, 7(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit16):
- movb %bh, 16(%eax)
-# endif
-L(Exit16):
- movdqu (%esi), %xmm0
- movdqu %xmm0, (%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit17):
- movb %bh, 17(%eax)
-# endif
-L(Exit17):
- movdqu (%esi), %xmm0
-# ifdef USE_AS_STRNCAT
- movb 16(%esi), %dh
-# endif
- movdqu %xmm0, (%eax)
- movb %dh, 16(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit18):
- movb %bh, 18(%eax)
-# endif
-L(Exit18):
- movdqu (%esi), %xmm0
- movw 16(%esi), %cx
- movdqu %xmm0, (%eax)
- movw %cx, 16(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit19):
- movb %bh, 19(%eax)
-# endif
-L(Exit19):
- movdqu (%esi), %xmm0
- movl 15(%esi), %ecx
- movdqu %xmm0, (%eax)
- movl %ecx, 15(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit20):
- movb %bh, 20(%eax)
-# endif
-L(Exit20):
- movdqu (%esi), %xmm0
- movl 16(%esi), %ecx
- movdqu %xmm0, (%eax)
- movl %ecx, 16(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit21):
- movb %bh, 21(%eax)
-# endif
-L(Exit21):
- movdqu (%esi), %xmm0
- movl 16(%esi), %ecx
-# ifdef USE_AS_STRNCAT
- movb 20(%esi), %dh
-# endif
- movdqu %xmm0, (%eax)
- movl %ecx, 16(%eax)
- movb %dh, 20(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit22):
- movb %bh, 22(%eax)
-# endif
-L(Exit22):
- movdqu (%esi), %xmm0
- movlpd 14(%esi), %xmm3
- movdqu %xmm0, (%eax)
- movlpd %xmm3, 14(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit23):
- movb %bh, 23(%eax)
-# endif
-L(Exit23):
- movdqu (%esi), %xmm0
- movlpd 15(%esi), %xmm3
- movdqu %xmm0, (%eax)
- movlpd %xmm3, 15(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit24):
- movb %bh, 24(%eax)
-# endif
-L(Exit24):
- movdqu (%esi), %xmm0
- movlpd 16(%esi), %xmm2
- movdqu %xmm0, (%eax)
- movlpd %xmm2, 16(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit25):
- movb %bh, 25(%eax)
-# endif
-L(Exit25):
- movdqu (%esi), %xmm0
- movlpd 16(%esi), %xmm2
-# ifdef USE_AS_STRNCAT
- movb 24(%esi), %dh
-# endif
- movdqu %xmm0, (%eax)
- movlpd %xmm2, 16(%eax)
- movb %dh, 24(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit26):
- movb %bh, 26(%eax)
-# endif
-L(Exit26):
- movdqu (%esi), %xmm0
- movlpd 16(%esi), %xmm2
- movw 24(%esi), %cx
- movdqu %xmm0, (%eax)
- movlpd %xmm2, 16(%eax)
- movw %cx, 24(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit27):
- movb %bh, 27(%eax)
-# endif
-L(Exit27):
- movdqu (%esi), %xmm0
- movlpd 16(%esi), %xmm2
- movl 23(%esi), %ecx
- movdqu %xmm0, (%eax)
- movlpd %xmm2, 16(%eax)
- movl %ecx, 23(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit28):
- movb %bh, 28(%eax)
-# endif
-L(Exit28):
- movdqu (%esi), %xmm0
- movlpd 16(%esi), %xmm2
- movl 24(%esi), %ecx
- movdqu %xmm0, (%eax)
- movlpd %xmm2, 16(%eax)
- movl %ecx, 24(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit29):
- movb %bh, 29(%eax)
-# endif
-L(Exit29):
- movdqu (%esi), %xmm0
- movdqu 13(%esi), %xmm2
- movdqu %xmm0, (%eax)
- movdqu %xmm2, 13(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit30):
- movb %bh, 30(%eax)
-# endif
-L(Exit30):
- movdqu (%esi), %xmm0
- movdqu 14(%esi), %xmm2
- movdqu %xmm0, (%eax)
- movdqu %xmm2, 14(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit31):
- movb %bh, 31(%eax)
-# endif
-L(Exit31):
- movdqu (%esi), %xmm0
- movdqu 15(%esi), %xmm2
- movdqu %xmm0, (%eax)
- movdqu %xmm2, 15(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit32):
- movb %bh, 32(%eax)
-# endif
-L(Exit32):
- movdqu (%esi), %xmm0
- movdqu 16(%esi), %xmm2
- movdqu %xmm0, (%eax)
- movdqu %xmm2, 16(%eax)
- mov STR3(%esp), %eax
- RETURN
-
-# ifdef USE_AS_STRNCAT
-
- .p2align 4
-L(UnalignedLeaveCase2OrCase3):
- test %edx, %edx
- jnz L(Unaligned64LeaveCase2)
-L(Unaligned64LeaveCase3):
- lea 64(%ebx), %ecx
- and $-16, %ecx
- add $48, %ebx
- jl L(CopyFrom1To16BytesCase3)
- movdqu %xmm4, (%eax)
- sub $16, %ebx
- jb L(CopyFrom1To16BytesCase3)
- movdqu %xmm5, 16(%eax)
- sub $16, %ebx
- jb L(CopyFrom1To16BytesCase3)
- movdqu %xmm6, 32(%eax)
- sub $16, %ebx
- jb L(CopyFrom1To16BytesCase3)
- movdqu %xmm7, 48(%eax)
- xor %bh, %bh
- movb %bh, 64(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-L(Unaligned64LeaveCase2):
- xor %ecx, %ecx
- pcmpeqb %xmm4, %xmm0
- pmovmskb %xmm0, %edx
- add $48, %ebx
- jle L(CopyFrom1To16BytesCase2OrCase3)
- test %edx, %edx
- jnz L(CopyFrom1To16Bytes)
-
- pcmpeqb %xmm5, %xmm0
- pmovmskb %xmm0, %edx
- movdqu %xmm4, (%eax)
- add $16, %ecx
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
- test %edx, %edx
- jnz L(CopyFrom1To16Bytes)
-
- pcmpeqb %xmm6, %xmm0
- pmovmskb %xmm0, %edx
- movdqu %xmm5, 16(%eax)
- add $16, %ecx
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
- test %edx, %edx
- jnz L(CopyFrom1To16Bytes)
-
- pcmpeqb %xmm7, %xmm0
- pmovmskb %xmm0, %edx
- movdqu %xmm6, 32(%eax)
- lea 16(%eax, %ecx), %eax
- lea 16(%esi, %ecx), %esi
- bsf %edx, %edx
- cmp %ebx, %edx
- jb L(CopyFrom1To16BytesExit)
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-# endif
- .p2align 4
-L(ExitZero):
- RETURN
-
-END (STRCAT)
-
- .p2align 4
- .section .rodata
-L(ExitTable):
- .int JMPTBL(L(Exit1), L(ExitTable))
- .int JMPTBL(L(Exit2), L(ExitTable))
- .int JMPTBL(L(Exit3), L(ExitTable))
- .int JMPTBL(L(Exit4), L(ExitTable))
- .int JMPTBL(L(Exit5), L(ExitTable))
- .int JMPTBL(L(Exit6), L(ExitTable))
- .int JMPTBL(L(Exit7), L(ExitTable))
- .int JMPTBL(L(Exit8), L(ExitTable))
- .int JMPTBL(L(Exit9), L(ExitTable))
- .int JMPTBL(L(Exit10), L(ExitTable))
- .int JMPTBL(L(Exit11), L(ExitTable))
- .int JMPTBL(L(Exit12), L(ExitTable))
- .int JMPTBL(L(Exit13), L(ExitTable))
- .int JMPTBL(L(Exit14), L(ExitTable))
- .int JMPTBL(L(Exit15), L(ExitTable))
- .int JMPTBL(L(Exit16), L(ExitTable))
- .int JMPTBL(L(Exit17), L(ExitTable))
- .int JMPTBL(L(Exit18), L(ExitTable))
- .int JMPTBL(L(Exit19), L(ExitTable))
- .int JMPTBL(L(Exit20), L(ExitTable))
- .int JMPTBL(L(Exit21), L(ExitTable))
- .int JMPTBL(L(Exit22), L(ExitTable))
- .int JMPTBL(L(Exit23), L(ExitTable))
- .int JMPTBL(L(Exit24), L(ExitTable))
- .int JMPTBL(L(Exit25), L(ExitTable))
- .int JMPTBL(L(Exit26), L(ExitTable))
- .int JMPTBL(L(Exit27), L(ExitTable))
- .int JMPTBL(L(Exit28), L(ExitTable))
- .int JMPTBL(L(Exit29), L(ExitTable))
- .int JMPTBL(L(Exit30), L(ExitTable))
- .int JMPTBL(L(Exit31), L(ExitTable))
- .int JMPTBL(L(Exit32), L(ExitTable))
-# ifdef USE_AS_STRNCAT
-L(ExitStrncatTable):
- .int JMPTBL(L(StrncatExit0), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit1), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit2), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit3), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit4), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit5), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit6), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit7), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit8), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit9), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit10), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit11), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit12), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit13), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit14), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit15), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit16), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit17), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit18), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit19), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit20), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit21), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit22), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit23), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit24), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit25), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit26), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit27), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit28), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit29), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit30), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit31), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit32), L(ExitStrncatTable))
-# endif
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strcat-ssse3.S b/sysdeps/i386/i686/multiarch/strcat-ssse3.S
deleted file mode 100644
index 72bc49c..0000000
--- a/sysdeps/i386/i686/multiarch/strcat-ssse3.S
+++ /dev/null
@@ -1,572 +0,0 @@
-/* strcat with SSSE3
- Copyright (C) 2011 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-
-#ifndef NOT_IN_libc
-
-# include <sysdep.h>
-
-# define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# ifndef STRCAT
-# define STRCAT __strcat_ssse3
-# endif
-
-# define PARMS 4
-# define STR1 PARMS+4
-# define STR2 STR1+4
-
-# ifdef USE_AS_STRNCAT
-# define LEN STR2+8
-# endif
-
-# define USE_AS_STRCAT
-
-.text
-ENTRY (STRCAT)
- PUSH (%edi)
- mov STR1(%esp), %edi
- mov %edi, %edx
-
-# define RETURN jmp L(StartStrcpyPart)
-# include "strlen-sse2.S"
-
-L(StartStrcpyPart):
- mov STR2(%esp), %ecx
- lea (%edi, %eax), %edx
-# ifdef USE_AS_STRNCAT
- PUSH (%ebx)
- mov LEN(%esp), %ebx
- test %ebx, %ebx
- jz L(StrncatExit0)
- cmp $8, %ebx
- jbe L(StrncatExit8Bytes)
-# endif
- cmpb $0, (%ecx)
- jz L(Exit1)
- cmpb $0, 1(%ecx)
- jz L(Exit2)
- cmpb $0, 2(%ecx)
- jz L(Exit3)
- cmpb $0, 3(%ecx)
- jz L(Exit4)
- cmpb $0, 4(%ecx)
- jz L(Exit5)
- cmpb $0, 5(%ecx)
- jz L(Exit6)
- cmpb $0, 6(%ecx)
- jz L(Exit7)
- cmpb $0, 7(%ecx)
- jz L(Exit8)
- cmpb $0, 8(%ecx)
- jz L(Exit9)
-# ifdef USE_AS_STRNCAT
- cmp $16, %ebx
- jb L(StrncatExit15Bytes)
-# endif
- cmpb $0, 9(%ecx)
- jz L(Exit10)
- cmpb $0, 10(%ecx)
- jz L(Exit11)
- cmpb $0, 11(%ecx)
- jz L(Exit12)
- cmpb $0, 12(%ecx)
- jz L(Exit13)
- cmpb $0, 13(%ecx)
- jz L(Exit14)
- cmpb $0, 14(%ecx)
- jz L(Exit15)
- cmpb $0, 15(%ecx)
- jz L(Exit16)
-# ifdef USE_AS_STRNCAT
- cmp $16, %ebx
- je L(StrncatExit16)
-
-# define RETURN1 \
- POP (%ebx); \
- POP (%edi); \
- ret; \
- CFI_PUSH (%ebx); \
- CFI_PUSH (%edi)
-# define USE_AS_STRNCPY
-# else
-# define RETURN1 POP (%edi); ret; CFI_PUSH (%edi)
-# endif
-# include "strcpy-ssse3.S"
- .p2align 4
-L(CopyFrom1To16Bytes):
- add %esi, %edx
- add %esi, %ecx
-
- POP (%esi)
- test %al, %al
- jz L(ExitHigh)
- test $0x01, %al
- jnz L(Exit1)
- test $0x02, %al
- jnz L(Exit2)
- test $0x04, %al
- jnz L(Exit3)
- test $0x08, %al
- jnz L(Exit4)
- test $0x10, %al
- jnz L(Exit5)
- test $0x20, %al
- jnz L(Exit6)
- test $0x40, %al
- jnz L(Exit7)
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(ExitHigh):
- test $0x01, %ah
- jnz L(Exit9)
- test $0x02, %ah
- jnz L(Exit10)
- test $0x04, %ah
- jnz L(Exit11)
- test $0x08, %ah
- jnz L(Exit12)
- test $0x10, %ah
- jnz L(Exit13)
- test $0x20, %ah
- jnz L(Exit14)
- test $0x40, %ah
- jnz L(Exit15)
- movlpd (%ecx), %xmm0
- movlpd 8(%ecx), %xmm1
- movlpd %xmm0, (%edx)
- movlpd %xmm1, 8(%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit1):
- movb %bh, 1(%edx)
-L(Exit1):
- movb (%ecx), %al
- movb %al, (%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit2):
- movb %bh, 2(%edx)
-L(Exit2):
- movw (%ecx), %ax
- movw %ax, (%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit3):
- movb %bh, 3(%edx)
-L(Exit3):
- movw (%ecx), %ax
- movw %ax, (%edx)
- movb 2(%ecx), %al
- movb %al, 2(%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit4):
- movb %bh, 4(%edx)
-L(Exit4):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit5):
- movb %bh, 5(%edx)
-L(Exit5):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movb 4(%ecx), %al
- movb %al, 4(%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit6):
- movb %bh, 6(%edx)
-L(Exit6):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movw 4(%ecx), %ax
- movw %ax, 4(%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit7):
- movb %bh, 7(%edx)
-L(Exit7):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movl 3(%ecx), %eax
- movl %eax, 3(%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit8):
- movb %bh, 8(%edx)
-L(Exit8):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit9):
- movb %bh, 9(%edx)
-L(Exit9):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movb 8(%ecx), %al
- movb %al, 8(%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit10):
- movb %bh, 10(%edx)
-L(Exit10):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movw 8(%ecx), %ax
- movw %ax, 8(%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit11):
- movb %bh, 11(%edx)
-L(Exit11):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movl 7(%ecx), %eax
- movl %eax, 7(%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit12):
- movb %bh, 12(%edx)
-L(Exit12):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movl 8(%ecx), %eax
- movl %eax, 8(%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit13):
- movb %bh, 13(%edx)
-L(Exit13):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movlpd 5(%ecx), %xmm0
- movlpd %xmm0, 5(%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit14):
- movb %bh, 14(%edx)
-L(Exit14):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movlpd 6(%ecx), %xmm0
- movlpd %xmm0, 6(%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit15):
- movb %bh, 15(%edx)
-L(Exit15):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movlpd 7(%ecx), %xmm0
- movlpd %xmm0, 7(%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit16):
- movb %bh, 16(%edx)
-L(Exit16):
- movlpd (%ecx), %xmm0
- movlpd 8(%ecx), %xmm1
- movlpd %xmm0, (%edx)
- movlpd %xmm1, 8(%edx)
- movl %edi, %eax
- RETURN1
-
-# ifdef USE_AS_STRNCPY
-
- CFI_PUSH(%esi)
-
- .p2align 4
-L(CopyFrom1To16BytesCase2):
- add $16, %ebx
- add %esi, %ecx
- lea (%esi, %edx), %esi
- lea -9(%ebx), %edx
- and $1<<7, %dh
- or %al, %dh
- test %dh, %dh
- lea (%esi), %edx
- POP (%esi)
- jz L(ExitHighCase2)
-
- test $0x01, %al
- jnz L(Exit1)
- cmp $1, %ebx
- je L(StrncatExit1)
- test $0x02, %al
- jnz L(Exit2)
- cmp $2, %ebx
- je L(StrncatExit2)
- test $0x04, %al
- jnz L(Exit3)
- cmp $3, %ebx
- je L(StrncatExit3)
- test $0x08, %al
- jnz L(Exit4)
- cmp $4, %ebx
- je L(StrncatExit4)
- test $0x10, %al
- jnz L(Exit5)
- cmp $5, %ebx
- je L(StrncatExit5)
- test $0x20, %al
- jnz L(Exit6)
- cmp $6, %ebx
- je L(StrncatExit6)
- test $0x40, %al
- jnz L(Exit7)
- cmp $7, %ebx
- je L(StrncatExit7)
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- lea 7(%edx), %eax
- cmpb $1, (%eax)
- sbb $-1, %eax
- xor %cl, %cl
- movb %cl, (%eax)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(ExitHighCase2):
- test $0x01, %ah
- jnz L(Exit9)
- cmp $9, %ebx
- je L(StrncatExit9)
- test $0x02, %ah
- jnz L(Exit10)
- cmp $10, %ebx
- je L(StrncatExit10)
- test $0x04, %ah
- jnz L(Exit11)
- cmp $11, %ebx
- je L(StrncatExit11)
- test $0x8, %ah
- jnz L(Exit12)
- cmp $12, %ebx
- je L(StrncatExit12)
- test $0x10, %ah
- jnz L(Exit13)
- cmp $13, %ebx
- je L(StrncatExit13)
- test $0x20, %ah
- jnz L(Exit14)
- cmp $14, %ebx
- je L(StrncatExit14)
- test $0x40, %ah
- jnz L(Exit15)
- cmp $15, %ebx
- je L(StrncatExit15)
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movlpd 8(%ecx), %xmm1
- movlpd %xmm1, 8(%edx)
- movl %edi, %eax
- RETURN1
-
- CFI_PUSH(%esi)
-
-L(CopyFrom1To16BytesCase2OrCase3):
- test %eax, %eax
- jnz L(CopyFrom1To16BytesCase2)
-
- .p2align 4
-L(CopyFrom1To16BytesCase3):
- add $16, %ebx
- add %esi, %edx
- add %esi, %ecx
-
- POP (%esi)
-
- cmp $8, %ebx
- ja L(ExitHighCase3)
- cmp $1, %ebx
- je L(StrncatExit1)
- cmp $2, %ebx
- je L(StrncatExit2)
- cmp $3, %ebx
- je L(StrncatExit3)
- cmp $4, %ebx
- je L(StrncatExit4)
- cmp $5, %ebx
- je L(StrncatExit5)
- cmp $6, %ebx
- je L(StrncatExit6)
- cmp $7, %ebx
- je L(StrncatExit7)
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movb %bh, 8(%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(ExitHighCase3):
- cmp $9, %ebx
- je L(StrncatExit9)
- cmp $10, %ebx
- je L(StrncatExit10)
- cmp $11, %ebx
- je L(StrncatExit11)
- cmp $12, %ebx
- je L(StrncatExit12)
- cmp $13, %ebx
- je L(StrncatExit13)
- cmp $14, %ebx
- je L(StrncatExit14)
- cmp $15, %ebx
- je L(StrncatExit15)
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movlpd 8(%ecx), %xmm1
- movlpd %xmm1, 8(%edx)
- movb %bh, 16(%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit0):
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit15Bytes):
- cmp $9, %ebx
- je L(StrncatExit9)
- cmpb $0, 9(%ecx)
- jz L(Exit10)
- cmp $10, %ebx
- je L(StrncatExit10)
- cmpb $0, 10(%ecx)
- jz L(Exit11)
- cmp $11, %ebx
- je L(StrncatExit11)
- cmpb $0, 11(%ecx)
- jz L(Exit12)
- cmp $12, %ebx
- je L(StrncatExit12)
- cmpb $0, 12(%ecx)
- jz L(Exit13)
- cmp $13, %ebx
- je L(StrncatExit13)
- cmpb $0, 13(%ecx)
- jz L(Exit14)
- cmp $14, %ebx
- je L(StrncatExit14)
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movlpd 7(%ecx), %xmm0
- movlpd %xmm0, 7(%edx)
- lea 14(%edx), %eax
- cmpb $1, (%eax)
- sbb $-1, %eax
- movb %bh, (%eax)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit8Bytes):
- cmpb $0, (%ecx)
- jz L(Exit1)
- cmp $1, %ebx
- je L(StrncatExit1)
- cmpb $0, 1(%ecx)
- jz L(Exit2)
- cmp $2, %ebx
- je L(StrncatExit2)
- cmpb $0, 2(%ecx)
- jz L(Exit3)
- cmp $3, %ebx
- je L(StrncatExit3)
- cmpb $0, 3(%ecx)
- jz L(Exit4)
- cmp $4, %ebx
- je L(StrncatExit4)
- cmpb $0, 4(%ecx)
- jz L(Exit5)
- cmp $5, %ebx
- je L(StrncatExit5)
- cmpb $0, 5(%ecx)
- jz L(Exit6)
- cmp $6, %ebx
- je L(StrncatExit6)
- cmpb $0, 6(%ecx)
- jz L(Exit7)
- cmp $7, %ebx
- je L(StrncatExit7)
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- lea 7(%edx), %eax
- cmpb $1, (%eax)
- sbb $-1, %eax
- movb %bh, (%eax)
- movl %edi, %eax
- RETURN1
-
-# endif
-END (STRCAT)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strcat.S b/sysdeps/i386/i686/multiarch/strcat.S
deleted file mode 100644
index e68feca..0000000
--- a/sysdeps/i386/i686/multiarch/strcat.S
+++ /dev/null
@@ -1,119 +0,0 @@
-/* Multiple versions of strcat
- Copyright (C) 2011-2012 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-#ifndef USE_AS_STRNCAT
-# ifndef STRCAT
-# define STRCAT strcat
-# endif
-#endif
-
-#ifdef USE_AS_STRNCAT
-# define STRCAT_SSSE3 __strncat_ssse3
-# define STRCAT_SSE2 __strncat_sse2
-# define STRCAT_IA32 __strncat_ia32
-# define __GI_STRCAT __GI_strncat
-#else
-# define STRCAT_SSSE3 __strcat_ssse3
-# define STRCAT_SSE2 __strcat_sse2
-# define STRCAT_IA32 __strcat_ia32
-# define __GI_STRCAT __GI_strcat
-#endif
-
-
-/* Define multiple versions only for the definition in libc. Don't
- define multiple versions for strncat in static library since we
- need strncat before the initialization happened. */
-#ifndef NOT_IN_libc
-
-# ifdef SHARED
- .text
-ENTRY(STRCAT)
- .type STRCAT, @gnu_indirect_function
- pushl %ebx
- cfi_adjust_cfa_offset (4)
- cfi_rel_offset (ebx, 0)
- LOAD_PIC_REG(bx)
- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
- jne 1f
- call __init_cpu_features
-1: leal STRCAT_IA32@GOTOFF(%ebx), %eax
- testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
- jz 2f
- leal STRCAT_SSE2@GOTOFF(%ebx), %eax
- testl $bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx)
- jnz 2f
- testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
- jz 2f
- leal STRCAT_SSSE3@GOTOFF(%ebx), %eax
-2: popl %ebx
- cfi_adjust_cfa_offset (-4)
- cfi_restore (ebx)
- ret
-END(STRCAT)
-# else
-
-ENTRY(STRCAT)
- .type STRCAT, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features
- jne 1f
- call __init_cpu_features
-1: leal STRCAT_IA32, %eax
- testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features
- jz 2f
- leal STRCAT_SSE2, %eax
- testl $bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features
- jnz 2f
- testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features
- jz 2f
- leal STRCAT_SSSE3, %eax
-2: ret
-END(STRCAT)
-
-# endif
-
-# undef ENTRY
-# define ENTRY(name) \
- .type STRCAT_IA32, @function; \
- .align 16; \
- STRCAT_IA32: cfi_startproc; \
- CALL_MCOUNT
-# undef END
-# define END(name) \
- cfi_endproc; .size STRCAT_IA32, .-STRCAT_IA32
-
-# ifdef SHARED
-# undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal strcat calls through a PLT.
- The speedup we get from using SSSE3 instruction is likely eaten away
- by the indirect call in the PLT. */
-# define libc_hidden_builtin_def(name) \
- .globl __GI_STRCAT; __GI_STRCAT = STRCAT_IA32
-# undef libc_hidden_def
-# define libc_hidden_def(name) \
- .globl __GI___STRCAT; __GI___STRCAT = STRCAT_IA32
-
-# endif
-#endif
-
-#ifndef USE_AS_STRNCAT
-# include "../../i486/strcat.S"
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strncat-c.c b/sysdeps/i386/i686/multiarch/strncat-c.c
deleted file mode 100644
index 132a000..0000000
--- a/sysdeps/i386/i686/multiarch/strncat-c.c
+++ /dev/null
@@ -1,8 +0,0 @@
-#define STRNCAT __strncat_ia32
-#ifdef SHARED
-#undef libc_hidden_def
-#define libc_hidden_def(name) \
- __hidden_ver1 (__strncat_ia32, __GI___strncat, __strncat_ia32);
-#endif
-
-#include "string/strncat.c"
diff --git a/sysdeps/i386/i686/multiarch/strncat-sse2.S b/sysdeps/i386/i686/multiarch/strncat-sse2.S
deleted file mode 100644
index f1045b7..0000000
--- a/sysdeps/i386/i686/multiarch/strncat-sse2.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define STRCAT __strncat_sse2
-#define USE_AS_STRNCAT
-
-#include "strcat-sse2.S"
diff --git a/sysdeps/i386/i686/multiarch/strncat-ssse3.S b/sysdeps/i386/i686/multiarch/strncat-ssse3.S
deleted file mode 100644
index 625b90a..0000000
--- a/sysdeps/i386/i686/multiarch/strncat-ssse3.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define STRCAT __strncat_ssse3
-#define USE_AS_STRNCAT
-
-#include "strcat-ssse3.S"
diff --git a/sysdeps/i386/i686/multiarch/strncat.S b/sysdeps/i386/i686/multiarch/strncat.S
deleted file mode 100644
index fd569c2..0000000
--- a/sysdeps/i386/i686/multiarch/strncat.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define STRCAT strncat
-#define USE_AS_STRNCAT
-#include "strcat.S"
diff --git a/sysdeps/powerpc/strcat.c b/sysdeps/powerpc/strcat.c
deleted file mode 100644
index 28575d0..0000000
--- a/sysdeps/powerpc/strcat.c
+++ /dev/null
@@ -1,30 +0,0 @@
-/* strcat version that uses fast strcpy/strlen.
- Copyright (C) 1997, 2003 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <string.h>
-
-#undef strcat
-
-/* Append SRC on the end of DEST. */
-char *
-strcat (char *dest, const char *src)
-{
- strcpy (dest + strlen (dest), src);
- return dest;
-}
-libc_hidden_builtin_def (strcat)
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 22f1435..ae94366 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -6,7 +6,7 @@ endif
ifeq ($(subdir),string)
-sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
+sysdep_routines += stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
strend-sse4 memcmp-sse4 memcpy-ssse3 mempcpy-ssse3 \
memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back \
memmove-ssse3-back strcasestr-nonascii strcasecmp_l-ssse3 \
@@ -14,8 +14,7 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
strcpy-ssse3 strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 \
strcpy-sse2-unaligned strncpy-sse2-unaligned \
stpcpy-sse2-unaligned stpncpy-sse2-unaligned \
- strcat-sse2-unaligned strncat-sse2-unaligned \
- strcat-ssse3 strncat-ssse3 strlen_atom strlen_avx \
+ strlen_atom strlen_avx \
strnlen-sse2-no-bsf strrchr-sse2-no-bsf strchr-sse2-no-bsf \
memcmp-ssse3
ifeq (yes,$(config-cflags-sse4))
diff --git a/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
deleted file mode 100644
index 7811ab5..0000000
--- a/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
+++ /dev/null
@@ -1,53 +0,0 @@
-/* strcat with SSE2
- Copyright (C) 2011 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#ifndef NOT_IN_libc
-
-# include <sysdep.h>
-
-# ifndef STRCAT
-# define STRCAT __strcat_sse2_unaligned
-# endif
-
-# define USE_AS_STRCAT
-
-.text
-ENTRY (STRCAT)
- mov %rdi, %r9
-# ifdef USE_AS_STRNCAT
- mov %rdx, %r8
-# endif
-
-# define RETURN jmp L(StartStrcpyPart)
-# include "strlen-sse2-pminub.S"
-# undef RETURN
-
-L(StartStrcpyPart):
- lea (%r9, %rax), %rdi
- mov %rsi, %rcx
- mov %r9, %rax /* save result */
-
-# ifdef USE_AS_STRNCAT
- test %r8, %r8
- jz L(ExitZero)
-# define USE_AS_STRNCPY
-# endif
-
-# include "strcpy-sse2-unaligned.S"
-#endif
diff --git a/sysdeps/x86_64/multiarch/strcat-ssse3.S b/sysdeps/x86_64/multiarch/strcat-ssse3.S
deleted file mode 100644
index abd2c0c..0000000
--- a/sysdeps/x86_64/multiarch/strcat-ssse3.S
+++ /dev/null
@@ -1,557 +0,0 @@
-/* strcat with SSSE3
- Copyright (C) 2011 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#ifndef NOT_IN_libc
-
-# include <sysdep.h>
-
-# ifndef STRCAT
-# define STRCAT __strcat_ssse3
-# endif
-
-# define USE_AS_STRCAT
-
-.text
-ENTRY (STRCAT)
-# ifdef USE_AS_STRNCAT
- mov %rdx, %r8
-# endif
-
-# define RETURN jmp L(StartStrcpyPart)
-# include "strlen-sse2-no-bsf.S"
-
-# undef RETURN
-
-L(StartStrcpyPart):
- mov %rsi, %rcx
- lea (%rdi, %rax), %rdx
-# ifdef USE_AS_STRNCAT
- test %r8, %r8
- jz L(StrncatExit0)
- cmp $8, %r8
- jbe L(StrncatExit8Bytes)
-# endif
- cmpb $0, (%rcx)
- jz L(Exit1)
- cmpb $0, 1(%rcx)
- jz L(Exit2)
- cmpb $0, 2(%rcx)
- jz L(Exit3)
- cmpb $0, 3(%rcx)
- jz L(Exit4)
- cmpb $0, 4(%rcx)
- jz L(Exit5)
- cmpb $0, 5(%rcx)
- jz L(Exit6)
- cmpb $0, 6(%rcx)
- jz L(Exit7)
- cmpb $0, 7(%rcx)
- jz L(Exit8)
- cmpb $0, 8(%rcx)
- jz L(Exit9)
-# ifdef USE_AS_STRNCAT
- cmp $16, %r8
- jb L(StrncatExit15Bytes)
-# endif
- cmpb $0, 9(%rcx)
- jz L(Exit10)
- cmpb $0, 10(%rcx)
- jz L(Exit11)
- cmpb $0, 11(%rcx)
- jz L(Exit12)
- cmpb $0, 12(%rcx)
- jz L(Exit13)
- cmpb $0, 13(%rcx)
- jz L(Exit14)
- cmpb $0, 14(%rcx)
- jz L(Exit15)
- cmpb $0, 15(%rcx)
- jz L(Exit16)
-# ifdef USE_AS_STRNCAT
- cmp $16, %r8
- je L(StrncatExit16)
-# define USE_AS_STRNCPY
-# endif
-
-# include "strcpy-ssse3.S"
-
- .p2align 4
-L(CopyFrom1To16Bytes):
- add %rsi, %rdx
- add %rsi, %rcx
-
- test %al, %al
- jz L(ExitHigh)
- test $0x01, %al
- jnz L(Exit1)
- test $0x02, %al
- jnz L(Exit2)
- test $0x04, %al
- jnz L(Exit3)
- test $0x08, %al
- jnz L(Exit4)
- test $0x10, %al
- jnz L(Exit5)
- test $0x20, %al
- jnz L(Exit6)
- test $0x40, %al
- jnz L(Exit7)
- movlpd (%rcx), %xmm0
- movlpd %xmm0, (%rdx)
- mov %rdi, %rax
- ret
-
- .p2align 4
-L(ExitHigh):
- test $0x01, %ah
- jnz L(Exit9)
- test $0x02, %ah
- jnz L(Exit10)
- test $0x04, %ah
- jnz L(Exit11)
- test $0x08, %ah
- jnz L(Exit12)
- test $0x10, %ah
- jnz L(Exit13)
- test $0x20, %ah
- jnz L(Exit14)
- test $0x40, %ah
- jnz L(Exit15)
- movlpd (%rcx), %xmm0
- movlpd 8(%rcx), %xmm1
- movlpd %xmm0, (%rdx)
- movlpd %xmm1, 8(%rdx)
- mov %rdi, %rax
- ret
-
- .p2align 4
-L(StrncatExit1):
- xor %ah, %ah
- movb %ah, 1(%rdx)
-L(Exit1):
- movb (%rcx), %al
- movb %al, (%rdx)
- mov %rdi, %rax
- ret
-
- .p2align 4
-L(StrncatExit2):
- xor %ah, %ah
- movb %ah, 2(%rdx)
-L(Exit2):
- movw (%rcx), %ax
- movw %ax, (%rdx)
- mov %rdi, %rax
- ret
-
- .p2align 4
-L(StrncatExit3):
- xor %ah, %ah
- movb %ah, 3(%rdx)
-L(Exit3):
- movw (%rcx), %ax
- movw %ax, (%rdx)
- movb 2(%rcx), %al
- movb %al, 2(%rdx)
- mov %rdi, %rax
- ret
-
- .p2align 4
-L(StrncatExit4):
- xor %ah, %ah
- movb %ah, 4(%rdx)
-L(Exit4):
- mov (%rcx), %eax
- mov %eax, (%rdx)
- mov %rdi, %rax
- ret
-
- .p2align 4
-L(StrncatExit5):
- xor %ah, %ah
- movb %ah, 5(%rdx)
-L(Exit5):
- mov (%rcx), %eax
- mov %eax, (%rdx)
- movb 4(%rcx), %al
- movb %al, 4(%rdx)
- mov %rdi, %rax
- ret
-
- .p2align 4
-L(StrncatExit6):
- xor %ah, %ah
- movb %ah, 6(%rdx)
-L(Exit6):
- mov (%rcx), %eax
- mov %eax, (%rdx)
- movw 4(%rcx), %ax
- movw %ax, 4(%rdx)
- mov %rdi, %rax
- ret
-
- .p2align 4
-L(StrncatExit7):
- xor %ah, %ah
- movb %ah, 7(%rdx)
-L(Exit7):
- mov (%rcx), %eax
- mov %eax, (%rdx)
- mov 3(%rcx), %eax
- mov %eax, 3(%rdx)
- mov %rdi, %rax
- ret
-
- .p2align 4
-L(StrncatExit8):
- xor %ah, %ah
- movb %ah, 8(%rdx)
-L(Exit8):
- movlpd (%rcx), %xmm0
- movlpd %xmm0, (%rdx)
- mov %rdi, %rax
- ret
-
- .p2align 4
-L(StrncatExit9):
- xor %ah, %ah
- movb %ah, 9(%rdx)
-L(Exit9):
- movlpd (%rcx), %xmm0
- movlpd %xmm0, (%rdx)
- movb 8(%rcx), %al
- movb %al, 8(%rdx)
- mov %rdi, %rax
- ret
-
- .p2align 4
-L(StrncatExit10):
- xor %ah, %ah
- movb %ah, 10(%rdx)
-L(Exit10):
- movlpd (%rcx), %xmm0
- movlpd %xmm0, (%rdx)
- movw 8(%rcx), %ax
- movw %ax, 8(%rdx)
- mov %rdi, %rax
- ret
-
- .p2align 4
-L(StrncatExit11):
- xor %ah, %ah
- movb %ah, 11(%rdx)
-L(Exit11):
- movlpd (%rcx), %xmm0
- movlpd %xmm0, (%rdx)
- mov 7(%rcx), %eax
- mov %eax, 7(%rdx)
- mov %rdi, %rax
- ret
-
- .p2align 4
-L(StrncatExit12):
- xor %ah, %ah
- movb %ah, 12(%rdx)
-L(Exit12):
- movlpd (%rcx), %xmm0
- movlpd %xmm0, (%rdx)
- mov 8(%rcx), %eax
- mov %eax, 8(%rdx)
- mov %rdi, %rax
- ret
-
- .p2align 4
-L(StrncatExit13):
- xor %ah, %ah
- movb %ah, 13(%rdx)
-L(Exit13):
- movlpd (%rcx), %xmm0
- movlpd %xmm0, (%rdx)
- movlpd 5(%rcx), %xmm1
- movlpd %xmm1, 5(%rdx)
- mov %rdi, %rax
- ret
-
- .p2align 4
-L(StrncatExit14):
- xor %ah, %ah
- movb %ah, 14(%rdx)
-L(Exit14):
- movlpd (%rcx), %xmm0
- movlpd %xmm0, (%rdx)
- movlpd 6(%rcx), %xmm1
- movlpd %xmm1, 6(%rdx)
- mov %rdi, %rax
- ret
-
- .p2align 4
-L(StrncatExit15):
- xor %ah, %ah
- movb %ah, 15(%rdx)
-L(Exit15):
- movlpd (%rcx), %xmm0
- movlpd %xmm0, (%rdx)
- movlpd 7(%rcx), %xmm1
- movlpd %xmm1, 7(%rdx)
- mov %rdi, %rax
- ret
-
- .p2align 4
-L(StrncatExit16):
- xor %ah, %ah
- movb %ah, 16(%rdx)
-L(Exit16):
- movlpd (%rcx), %xmm0
- movlpd 8(%rcx), %xmm1
- movlpd %xmm0, (%rdx)
- movlpd %xmm1, 8(%rdx)
- mov %rdi, %rax
- ret
-
-# ifdef USE_AS_STRNCPY
-
- .p2align 4
-L(CopyFrom1To16BytesCase2):
- add $16, %r8
- add %rsi, %rcx
- lea (%rsi, %rdx), %rsi
- lea -9(%r8), %rdx
- and $1<<7, %dh
- or %al, %dh
- test %dh, %dh
- lea (%rsi), %rdx
- jz L(ExitHighCase2)
-
- test $0x01, %al
- jnz L(Exit1)
- cmp $1, %r8
- je L(StrncatExit1)
- test $0x02, %al
- jnz L(Exit2)
- cmp $2, %r8
- je L(StrncatExit2)
- test $0x04, %al
- jnz L(Exit3)
- cmp $3, %r8
- je L(StrncatExit3)
- test $0x08, %al
- jnz L(Exit4)
- cmp $4, %r8
- je L(StrncatExit4)
- test $0x10, %al
- jnz L(Exit5)
- cmp $5, %r8
- je L(StrncatExit5)
- test $0x20, %al
- jnz L(Exit6)
- cmp $6, %r8
- je L(StrncatExit6)
- test $0x40, %al
- jnz L(Exit7)
- cmp $7, %r8
- je L(StrncatExit7)
- movlpd (%rcx), %xmm0
- movlpd %xmm0, (%rdx)
- lea 7(%rdx), %rax
- cmpb $1, (%rax)
- sbb $-1, %rax
- xor %cl, %cl
- movb %cl, (%rax)
- mov %rdi, %rax
- ret
-
- .p2align 4
-L(ExitHighCase2):
- test $0x01, %ah
- jnz L(Exit9)
- cmp $9, %r8
- je L(StrncatExit9)
- test $0x02, %ah
- jnz L(Exit10)
- cmp $10, %r8
- je L(StrncatExit10)
- test $0x04, %ah
- jnz L(Exit11)
- cmp $11, %r8
- je L(StrncatExit11)
- test $0x8, %ah
- jnz L(Exit12)
- cmp $12, %r8
- je L(StrncatExit12)
- test $0x10, %ah
- jnz L(Exit13)
- cmp $13, %r8
- je L(StrncatExit13)
- test $0x20, %ah
- jnz L(Exit14)
- cmp $14, %r8
- je L(StrncatExit14)
- test $0x40, %ah
- jnz L(Exit15)
- cmp $15, %r8
- je L(StrncatExit15)
- movlpd (%rcx), %xmm0
- movlpd %xmm0, (%rdx)
- movlpd 8(%rcx), %xmm1
- movlpd %xmm1, 8(%rdx)
- mov %rdi, %rax
- ret
-
-L(CopyFrom1To16BytesCase2OrCase3):
- test %rax, %rax
- jnz L(CopyFrom1To16BytesCase2)
-
- .p2align 4
-L(CopyFrom1To16BytesCase3):
- add $16, %r8
- add %rsi, %rdx
- add %rsi, %rcx
-
- cmp $8, %r8
- ja L(ExitHighCase3)
- cmp $1, %r8
- je L(StrncatExit1)
- cmp $2, %r8
- je L(StrncatExit2)
- cmp $3, %r8
- je L(StrncatExit3)
- cmp $4, %r8
- je L(StrncatExit4)
- cmp $5, %r8
- je L(StrncatExit5)
- cmp $6, %r8
- je L(StrncatExit6)
- cmp $7, %r8
- je L(StrncatExit7)
- movlpd (%rcx), %xmm0
- movlpd %xmm0, (%rdx)
- xor %ah, %ah
- movb %ah, 8(%rdx)
- mov %rdi, %rax
- ret
-
- .p2align 4
-L(ExitHighCase3):
- cmp $9, %r8
- je L(StrncatExit9)
- cmp $10, %r8
- je L(StrncatExit10)
- cmp $11, %r8
- je L(StrncatExit11)
- cmp $12, %r8
- je L(StrncatExit12)
- cmp $13, %r8
- je L(StrncatExit13)
- cmp $14, %r8
- je L(StrncatExit14)
- cmp $15, %r8
- je L(StrncatExit15)
- movlpd (%rcx), %xmm0
- movlpd %xmm0, (%rdx)
- movlpd 8(%rcx), %xmm1
- movlpd %xmm1, 8(%rdx)
- xor %ah, %ah
- movb %ah, 16(%rdx)
- mov %rdi, %rax
- ret
-
- .p2align 4
-L(StrncatExit0):
- mov %rdi, %rax
- ret
-
- .p2align 4
-L(StrncatExit15Bytes):
- cmp $9, %r8
- je L(StrncatExit9)
- cmpb $0, 9(%rcx)
- jz L(Exit10)
- cmp $10, %r8
- je L(StrncatExit10)
- cmpb $0, 10(%rcx)
- jz L(Exit11)
- cmp $11, %r8
- je L(StrncatExit11)
- cmpb $0, 11(%rcx)
- jz L(Exit12)
- cmp $12, %r8
- je L(StrncatExit12)
- cmpb $0, 12(%rcx)
- jz L(Exit13)
- cmp $13, %r8
- je L(StrncatExit13)
- cmpb $0, 13(%rcx)
- jz L(Exit14)
- cmp $14, %r8
- je L(StrncatExit14)
- movlpd (%rcx), %xmm0
- movlpd %xmm0, (%rdx)
- movlpd 7(%rcx), %xmm1
- movlpd %xmm1, 7(%rdx)
- lea 14(%rdx), %rax
- cmpb $1, (%rax)
- sbb $-1, %rax
- xor %cl, %cl
- movb %cl, (%rax)
- mov %rdi, %rax
- ret
-
- .p2align 4
-L(StrncatExit8Bytes):
- cmpb $0, (%rcx)
- jz L(Exit1)
- cmp $1, %r8
- je L(StrncatExit1)
- cmpb $0, 1(%rcx)
- jz L(Exit2)
- cmp $2, %r8
- je L(StrncatExit2)
- cmpb $0, 2(%rcx)
- jz L(Exit3)
- cmp $3, %r8
- je L(StrncatExit3)
- cmpb $0, 3(%rcx)
- jz L(Exit4)
- cmp $4, %r8
- je L(StrncatExit4)
- cmpb $0, 4(%rcx)
- jz L(Exit5)
- cmp $5, %r8
- je L(StrncatExit5)
- cmpb $0, 5(%rcx)
- jz L(Exit6)
- cmp $6, %r8
- je L(StrncatExit6)
- cmpb $0, 6(%rcx)
- jz L(Exit7)
- cmp $7, %r8
- je L(StrncatExit7)
- movlpd (%rcx), %xmm0
- movlpd %xmm0, (%rdx)
- lea 7(%rdx), %rax
- cmpb $1, (%rax)
- sbb $-1, %rax
- xor %cl, %cl
- movb %cl, (%rax)
- mov %rdi, %rax
- ret
-
-# endif
-END (STRCAT)
-#endif
diff --git a/sysdeps/x86_64/multiarch/strcat.S b/sysdeps/x86_64/multiarch/strcat.S
deleted file mode 100644
index 0c256de..0000000
--- a/sysdeps/x86_64/multiarch/strcat.S
+++ /dev/null
@@ -1,84 +0,0 @@
-/* Multiple versions of strcat
- Copyright (C) 2009, 2011 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-#ifndef USE_AS_STRNCAT
-# ifndef STRCAT
-# define STRCAT strcat
-# endif
-#endif
-
-#ifdef USE_AS_STRNCAT
-# define STRCAT_SSSE3 __strncat_ssse3
-# define STRCAT_SSE2 __strncat_sse2
-# define STRCAT_SSE2_UNALIGNED __strncat_sse2_unaligned
-# define __GI_STRCAT __GI_strncat
-# define __GI___STRCAT __GI___strncat
-#else
-# define STRCAT_SSSE3 __strcat_ssse3
-# define STRCAT_SSE2 __strcat_sse2
-# define STRCAT_SSE2_UNALIGNED __strcat_sse2_unaligned
-# define __GI_STRCAT __GI_strcat
-# define __GI___STRCAT __GI___strcat
-#endif
-
-
-/* Define multiple versions only for the definition in libc. */
-#ifndef NOT_IN_libc
- .text
-ENTRY(STRCAT)
- .type STRCAT, @gnu_indirect_function
- cmpl $0, __cpu_features+KIND_OFFSET(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq STRCAT_SSE2_UNALIGNED(%rip), %rax
- testl $bit_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_Fast_Unaligned_Load(%rip)
- jnz 2f
- leaq STRCAT_SSE2(%rip), %rax
- testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
- jz 2f
- leaq STRCAT_SSSE3(%rip), %rax
-2: ret
-END(STRCAT)
-
-# undef ENTRY
-# define ENTRY(name) \
- .type STRCAT_SSE2, @function; \
- .align 16; \
- STRCAT_SSE2: cfi_startproc; \
- CALL_MCOUNT
-# undef END
-# define END(name) \
- cfi_endproc; .size STRCAT_SSE2, .-STRCAT_SSE2
-# undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal strcat calls through a PLT.
- The speedup we get from using SSSE3 instruction is likely eaten away
- by the indirect call in the PLT. */
-# define libc_hidden_builtin_def(name) \
- .globl __GI_STRCAT; __GI_STRCAT = STRCAT_SSE2
-# undef libc_hidden_def
-# define libc_hidden_def(name) \
- .globl __GI___STRCAT; __GI___STRCAT = STRCAT_SSE2
-#endif
-
-#ifndef USE_AS_STRNCAT
-# include "../strcat.S"
-#endif
diff --git a/sysdeps/x86_64/multiarch/strncat-c.c b/sysdeps/x86_64/multiarch/strncat-c.c
deleted file mode 100644
index a3cdbff..0000000
--- a/sysdeps/x86_64/multiarch/strncat-c.c
+++ /dev/null
@@ -1,8 +0,0 @@
-#define STRNCAT __strncat_sse2
-#ifdef SHARED
-#undef libc_hidden_def
-#define libc_hidden_def(name) \
- __hidden_ver1 (__strncat_sse2, __GI___strncat, __strncat_sse2);
-#endif
-
-#include "string/strncat.c"
diff --git a/sysdeps/x86_64/multiarch/strncat-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strncat-sse2-unaligned.S
deleted file mode 100644
index 133e1d2..0000000
--- a/sysdeps/x86_64/multiarch/strncat-sse2-unaligned.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define USE_AS_STRNCAT
-#define STRCAT __strncat_sse2_unaligned
-#include "strcat-sse2-unaligned.S"
diff --git a/sysdeps/x86_64/multiarch/strncat-ssse3.S b/sysdeps/x86_64/multiarch/strncat-ssse3.S
deleted file mode 100644
index 6c45ff3..0000000
--- a/sysdeps/x86_64/multiarch/strncat-ssse3.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define USE_AS_STRNCAT
-#define STRCAT __strncat_ssse3
-#include "strcat-ssse3.S"
diff --git a/sysdeps/x86_64/multiarch/strncat.S b/sysdeps/x86_64/multiarch/strncat.S
deleted file mode 100644
index fd569c2..0000000
--- a/sysdeps/x86_64/multiarch/strncat.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define STRCAT strncat
-#define USE_AS_STRNCAT
-#include "strcat.S"
diff --git a/sysdeps/x86_64/strcat.S b/sysdeps/x86_64/strcat.S
deleted file mode 100644
index 535a18d..0000000
--- a/sysdeps/x86_64/strcat.S
+++ /dev/null
@@ -1,259 +0,0 @@
-/* strcat(dest, src) -- Append SRC on the end of DEST.
- Optimized for x86-64.
- Copyright (C) 2002 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Andreas Jaeger <aj@suse.de>, 2002.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-#include "bp-sym.h"
-#include "bp-asm.h"
-
-
- .text
-ENTRY (BP_SYM (strcat))
- movq %rdi, %rcx /* Dest. register. */
- andl $7, %ecx /* mask alignment bits */
- movq %rdi, %rax /* Duplicate destination pointer. */
- movq $0xfefefefefefefeff,%r8
-
- /* First step: Find end of destination. */
- jz 4f /* aligned => start loop */
-
- neg %ecx /* We need to align to 8 bytes. */
- addl $8,%ecx
- /* Search the first bytes directly. */
-0: cmpb $0x0,(%rax) /* is byte NUL? */
- je 2f /* yes => start copy */
- incq %rax /* increment pointer */
- decl %ecx
- jnz 0b
-
-
-
- /* Now the source is aligned. Scan for NUL byte. */
- .p2align 4
-4:
- /* First unroll. */
- movq (%rax), %rcx /* get double word (= 8 bytes) in question */
- addq $8,%rax /* adjust pointer for next word */
- movq %r8, %rdx /* magic value */
- addq %rcx, %rdx /* add the magic value to the word. We get
- carry bits reported for each byte which
- is *not* 0 */
- jnc 3f /* highest byte is NUL => return pointer */
- xorq %rcx, %rdx /* (word+magic)^word */
- orq %r8, %rdx /* set all non-carry bits */
- incq %rdx /* add 1: if one carry bit was *not* set
- the addition will not result in 0. */
- jnz 3f /* found NUL => return pointer */
-
- /* Second unroll. */
- movq (%rax), %rcx /* get double word (= 8 bytes) in question */
- addq $8,%rax /* adjust pointer for next word */
- movq %r8, %rdx /* magic value */
- addq %rcx, %rdx /* add the magic value to the word. We get
- carry bits reported for each byte which
- is *not* 0 */
- jnc 3f /* highest byte is NUL => return pointer */
- xorq %rcx, %rdx /* (word+magic)^word */
- orq %r8, %rdx /* set all non-carry bits */
- incq %rdx /* add 1: if one carry bit was *not* set
- the addition will not result in 0. */
- jnz 3f /* found NUL => return pointer */
-
- /* Third unroll. */
- movq (%rax), %rcx /* get double word (= 8 bytes) in question */
- addq $8,%rax /* adjust pointer for next word */
- movq %r8, %rdx /* magic value */
- addq %rcx, %rdx /* add the magic value to the word. We get
- carry bits reported for each byte which
- is *not* 0 */
- jnc 3f /* highest byte is NUL => return pointer */
- xorq %rcx, %rdx /* (word+magic)^word */
- orq %r8, %rdx /* set all non-carry bits */
- incq %rdx /* add 1: if one carry bit was *not* set
- the addition will not result in 0. */
- jnz 3f /* found NUL => return pointer */
-
- /* Fourth unroll. */
- movq (%rax), %rcx /* get double word (= 8 bytes) in question */
- addq $8,%rax /* adjust pointer for next word */
- movq %r8, %rdx /* magic value */
- addq %rcx, %rdx /* add the magic value to the word. We get
- carry bits reported for each byte which
- is *not* 0 */
- jnc 3f /* highest byte is NUL => return pointer */
- xorq %rcx, %rdx /* (word+magic)^word */
- orq %r8, %rdx /* set all non-carry bits */
- incq %rdx /* add 1: if one carry bit was *not* set
- the addition will not result in 0. */
- jz 4b /* no NUL found => continue loop */
-
- .p2align 4 /* Align, it's a jump target. */
-3: subq $8,%rax /* correct pointer increment. */
-
- testb %cl, %cl /* is first byte NUL? */
- jz 2f /* yes => return */
- incq %rax /* increment pointer */
-
- testb %ch, %ch /* is second byte NUL? */
- jz 2f /* yes => return */
- incq %rax /* increment pointer */
-
- testl $0x00ff0000, %ecx /* is third byte NUL? */
- jz 2f /* yes => return pointer */
- incq %rax /* increment pointer */
-
- testl $0xff000000, %ecx /* is fourth byte NUL? */
- jz 2f /* yes => return pointer */
- incq %rax /* increment pointer */
-
- shrq $32, %rcx /* look at other half. */
-
- testb %cl, %cl /* is first byte NUL? */
- jz 2f /* yes => return */
- incq %rax /* increment pointer */
-
- testb %ch, %ch /* is second byte NUL? */
- jz 2f /* yes => return */
- incq %rax /* increment pointer */
-
- testl $0xff0000, %ecx /* is third byte NUL? */
- jz 2f /* yes => return pointer */
- incq %rax /* increment pointer */
-
-2:
- /* Second step: Copy source to destination. */
-
- movq %rsi, %rcx /* duplicate */
- andl $7,%ecx /* mask alignment bits */
- movq %rax, %rdx /* move around */
- jz 22f /* aligned => start loop */
-
- neg %ecx /* align to 8 bytes. */
- addl $8, %ecx
- /* Align the source pointer. */
-21:
- movb (%rsi), %al /* Fetch a byte */
- testb %al, %al /* Is it NUL? */
- movb %al, (%rdx) /* Store it */
- jz 24f /* If it was NUL, done! */
- incq %rsi
- incq %rdx
- decl %ecx
- jnz 21b
-
- /* Now the sources is aligned. Unfortunatly we cannot force
- to have both source and destination aligned, so ignore the
- alignment of the destination. */
- .p2align 4
-22:
- /* 1st unroll. */
- movq (%rsi), %rax /* Read double word (8 bytes). */
- addq $8, %rsi /* Adjust pointer for next word. */
- movq %rax, %r9 /* Save a copy for NUL finding. */
- addq %r8, %r9 /* add the magic value to the word. We get
- carry bits reported for each byte which
- is *not* 0 */
- jnc 23f /* highest byte is NUL => return pointer */
- xorq %rax, %r9 /* (word+magic)^word */
- orq %r8, %r9 /* set all non-carry bits */
- incq %r9 /* add 1: if one carry bit was *not* set
- the addition will not result in 0. */
-
- jnz 23f /* found NUL => return pointer */
-
- movq %rax, (%rdx) /* Write value to destination. */
- addq $8, %rdx /* Adjust pointer. */
-
- /* 2nd unroll. */
- movq (%rsi), %rax /* Read double word (8 bytes). */
- addq $8, %rsi /* Adjust pointer for next word. */
- movq %rax, %r9 /* Save a copy for NUL finding. */
- addq %r8, %r9 /* add the magic value to the word. We get
- carry bits reported for each byte which
- is *not* 0 */
- jnc 23f /* highest byte is NUL => return pointer */
- xorq %rax, %r9 /* (word+magic)^word */
- orq %r8, %r9 /* set all non-carry bits */
- incq %r9 /* add 1: if one carry bit was *not* set
- the addition will not result in 0. */
-
- jnz 23f /* found NUL => return pointer */
-
- movq %rax, (%rdx) /* Write value to destination. */
- addq $8, %rdx /* Adjust pointer. */
-
- /* 3rd unroll. */
- movq (%rsi), %rax /* Read double word (8 bytes). */
- addq $8, %rsi /* Adjust pointer for next word. */
- movq %rax, %r9 /* Save a copy for NUL finding. */
- addq %r8, %r9 /* add the magic value to the word. We get
- carry bits reported for each byte which
- is *not* 0 */
- jnc 23f /* highest byte is NUL => return pointer */
- xorq %rax, %r9 /* (word+magic)^word */
- orq %r8, %r9 /* set all non-carry bits */
- incq %r9 /* add 1: if one carry bit was *not* set
- the addition will not result in 0. */
-
- jnz 23f /* found NUL => return pointer */
-
- movq %rax, (%rdx) /* Write value to destination. */
- addq $8, %rdx /* Adjust pointer. */
-
- /* 4th unroll. */
- movq (%rsi), %rax /* Read double word (8 bytes). */
- addq $8, %rsi /* Adjust pointer for next word. */
- movq %rax, %r9 /* Save a copy for NUL finding. */
- addq %r8, %r9 /* add the magic value to the word. We get
- carry bits reported for each byte which
- is *not* 0 */
- jnc 23f /* highest byte is NUL => return pointer */
- xorq %rax, %r9 /* (word+magic)^word */
- orq %r8, %r9 /* set all non-carry bits */
- incq %r9 /* add 1: if one carry bit was *not* set
- the addition will not result in 0. */
-
- jnz 23f /* found NUL => return pointer */
-
- movq %rax, (%rdx) /* Write value to destination. */
- addq $8, %rdx /* Adjust pointer. */
- jmp 22b /* Next iteration. */
-
- /* Do the last few bytes. %rax contains the value to write.
- The loop is unrolled twice. */
- .p2align 4
-23:
- movb %al, (%rdx) /* 1st byte. */
- testb %al, %al /* Is it NUL. */
- jz 24f /* yes, finish. */
- incq %rdx /* Increment destination. */
- movb %ah, (%rdx) /* 2nd byte. */
- testb %ah, %ah /* Is it NUL?. */
- jz 24f /* yes, finish. */
- incq %rdx /* Increment destination. */
- shrq $16, %rax /* Shift... */
- jmp 23b /* and look at next two bytes in %rax. */
-
-
-24:
- movq %rdi, %rax /* Source is return value. */
- retq
-END (BP_SYM (strcat))
-libc_hidden_builtin_def (strcat)
--
1.7.4.4