This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] faster strcat


This is next version of my patch
http://sourceware.org/ml/libc-alpha/2012-06/msg00489.html

I investigated strcat bit futher and speed degradation 
was caused by improper usage of indirect functions.

strcat ifunc first tests bit_Fast_Unaligned_Load which is 
false on core2 and AMD processors. Then it checks ssse3 and 
calls ssse3 version. 
But strcat_ssse3 inlines strlen_sse2_no_bsf which on core2 and phenomII
is slowest strlen variant unless strings is larger than 2000 where
strlen_sse2 takes lead.

Then I deleted strcat variants that are no longer needed.

Files ports/sysdeps/ia64/strcat.c, sysdeps/powerpc/strcat.c,  became
duplicates of string/strcat.c.


	* string/strcat.c: Reduce algorithm selection 
	  to strlen,strcpy
	* string/strncat.c: Likewise
	* sysdeps/powerpc/strcat.c: Duplicated string/strcat.c
	* ports/sysdeps/ia64/strcat.c: Likewise

	* sysdeps/i386/i686/multiarch/Makefile: Updated
	* sysdeps/x86_64/multiarch/Makefile:    Updated   

	* sysdeps/i386/i486/strcat.S: No longer needed
	* sysdeps/i386/i686/multiarch/strcat-sse2.S:Likewise
	* sysdeps/i386/i686/multiarch/strcat-ssse3.S:Likewise      
	* sysdeps/i386/i686/multiarch/strcat.S:Likewise           
	* sysdeps/i386/i686/multiarch/strncat-c.c:Likewise 
	* sysdeps/i386/i686/multiarch/strncat-sse2.S:Likewise       
	* sysdeps/i386/i686/multiarch/strncat-ssse3.S:Likewise      
	* sysdeps/i386/i686/multiarch/strncat.S:Likewise            
	* sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S:Likewise 
	* sysdeps/x86_64/multiarch/strcat-ssse3.S:Likewise          
	* sysdeps/x86_64/multiarch/strcat.S:Likewise                
	* sysdeps/x86_64/multiarch/strncat-c.c:Likewise
	* sysdeps/x86_64/multiarch/strncat-sse2-unaligned.S:Likewise
	* sysdeps/x86_64/multiarch/strncat-ssse3.S:Likewise         
	* sysdeps/x86_64/multiarch/strncat.S:Likewise             
	* sysdeps/x86_64/strcat.S:Likewise                         


---
 ports/sysdeps/ia64/strcat.c                       |   26 -
 string/strcat.c                                   |   29 +-
 string/strncat.c                                  |   62 +-
 sysdeps/i386/i486/strcat.S                        |  273 -----
 sysdeps/i386/i686/multiarch/Makefile              |    3 +-
 sysdeps/i386/i686/multiarch/strcat-sse2.S         | 1243 ---------------------
 sysdeps/i386/i686/multiarch/strcat-ssse3.S        |  572 ----------
 sysdeps/i386/i686/multiarch/strcat.S              |  119 --
 sysdeps/i386/i686/multiarch/strncat-c.c           |    8 -
 sysdeps/i386/i686/multiarch/strncat-sse2.S        |    4 -
 sysdeps/i386/i686/multiarch/strncat-ssse3.S       |    4 -
 sysdeps/i386/i686/multiarch/strncat.S             |    3 -
 sysdeps/powerpc/strcat.c                          |   30 -
 sysdeps/x86_64/multiarch/Makefile                 |    5 +-
 sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S  |   53 -
 sysdeps/x86_64/multiarch/strcat-ssse3.S           |  557 ---------
 sysdeps/x86_64/multiarch/strcat.S                 |   84 --
 sysdeps/x86_64/multiarch/strncat-c.c              |    8 -
 sysdeps/x86_64/multiarch/strncat-sse2-unaligned.S |    3 -
 sysdeps/x86_64/multiarch/strncat-ssse3.S          |    3 -
 sysdeps/x86_64/multiarch/strncat.S                |    3 -
 sysdeps/x86_64/strcat.S                           |  259 -----
 24 files changed, 15 insertions(+), 4280 deletions(-)
 delete mode 100644 ports/sysdeps/ia64/strcat.c
 delete mode 100644 sysdeps/i386/i486/strcat.S
 delete mode 100644 sysdeps/i386/i686/multiarch/strcat-sse2.S
 delete mode 100644 sysdeps/i386/i686/multiarch/strcat-ssse3.S
 delete mode 100644 sysdeps/i386/i686/multiarch/strcat.S
 delete mode 100644 sysdeps/i386/i686/multiarch/strncat-c.c
 delete mode 100644 sysdeps/i386/i686/multiarch/strncat-sse2.S
 delete mode 100644 sysdeps/i386/i686/multiarch/strncat-ssse3.S
 delete mode 100644 sysdeps/i386/i686/multiarch/strncat.S
 delete mode 100644 sysdeps/powerpc/strcat.c
 delete mode 100644 sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
 delete mode 100644 sysdeps/x86_64/multiarch/strcat-ssse3.S
 delete mode 100644 sysdeps/x86_64/multiarch/strcat.S
 delete mode 100644 sysdeps/x86_64/multiarch/strncat-c.c
 delete mode 100644 sysdeps/x86_64/multiarch/strncat-sse2-unaligned.S
 delete mode 100644 sysdeps/x86_64/multiarch/strncat-ssse3.S
 delete mode 100644 sysdeps/x86_64/multiarch/strncat.S
 delete mode 100644 sysdeps/x86_64/strcat.S

diff --git a/ports/sysdeps/ia64/strcat.c b/ports/sysdeps/ia64/strcat.c
deleted file mode 100644
index 53cd4d1..0000000
--- a/ports/sysdeps/ia64/strcat.c
+++ /dev/null
@@ -1,26 +0,0 @@
-/* Copyright (C) 2004 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <string.h>
-
-char *
-strcat (char *dest, const char *src)
-{
-  strcpy (dest + strlen (dest), src);
-  return dest;
-}
-libc_hidden_builtin_def (strcat)
diff --git a/string/strcat.c b/string/strcat.c
index f9e4bc6..28575d0 100644
--- a/string/strcat.c
+++ b/string/strcat.c
@@ -1,4 +1,5 @@
-/* Copyright (C) 1991, 1997, 2003 Free Software Foundation, Inc.
+/* strcat version that uses fast strcpy/strlen.
+   Copyright (C) 1997, 2003 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,36 +17,14 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <string.h>
-#include <memcopy.h>
 
 #undef strcat
 
 /* Append SRC on the end of DEST.  */
 char *
-strcat (dest, src)
-     char *dest;
-     const char *src;
+strcat (char *dest, const char *src)
 {
-  char *s1 = dest;
-  const char *s2 = src;
-  char c;
-
-  /* Find the end of the string.  */
-  do
-    c = *s1++;
-  while (c != '\0');
-
-  /* Make S1 point before the next character, so we can increment
-     it while memory is read (wins on pipelined cpus).  */
-  s1 -= 2;
-
-  do
-    {
-      c = *s2++;
-      *++s1 = c;
-    }
-  while (c != '\0');
-
+  strcpy (dest + strlen (dest), src);
   return dest;
 }
 libc_hidden_builtin_def (strcat)
diff --git a/string/strncat.c b/string/strncat.c
index dcfb04d..17b4c9a 100644
--- a/string/strncat.c
+++ b/string/strncat.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1991,1997,2011 Free Software Foundation, Inc.
+/* Copyright (C) 1991-2012 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -17,66 +17,20 @@
 
 #include <string.h>
 
-#ifdef _LIBC
-# include <memcopy.h>
-#endif
-
 #ifndef STRNCAT
 # undef strncat
 # define STRNCAT  strncat
 #endif
 
 char *
-STRNCAT (char *s1, const char *s2, size_t n)
+STRNCAT (char *dest, const char *src, size_t n)
 {
-  char c;
-  char *s = s1;
-
-  /* Find the end of S1.  */
-  do
-    c = *s1++;
-  while (c != '\0');
-
-  /* Make S1 point before next character, so we can increment
-     it while memory is read (wins on pipelined cpus).  */
-  s1 -= 2;
-
-  if (n >= 4)
-    {
-      size_t n4 = n >> 2;
-      do
-	{
-	  c = *s2++;
-	  *++s1 = c;
-	  if (c == '\0')
-	    return s;
-	  c = *s2++;
-	  *++s1 = c;
-	  if (c == '\0')
-	    return s;
-	  c = *s2++;
-	  *++s1 = c;
-	  if (c == '\0')
-	    return s;
-	  c = *s2++;
-	  *++s1 = c;
-	  if (c == '\0')
-	    return s;
-	} while (--n4 > 0);
-      n &= 3;
-    }
-
-  while (n > 0)
-    {
-      c = *s2++;
-      *++s1 = c;
-      if (c == '\0')
-	return s;
-      n--;
-    }
+  size_t dest_len = strlen (dest);
+  size_t src_len  = strnlen (src , n);
 
-  if (c != '\0')
-    *++s1 = '\0';
+  if (src_len == n)
+    {
+     memcpy (dest + dest_len, src, n);
+     dest[dest_len + n] = '\0';
+    }
+  else
+    strcpy (dest + dest_len, src);
 
-  return s;
+  return dest;
 }
diff --git a/sysdeps/i386/i486/strcat.S b/sysdeps/i386/i486/strcat.S
deleted file mode 100644
index 7596a0d..0000000
--- a/sysdeps/i386/i486/strcat.S
+++ /dev/null
@@ -1,273 +0,0 @@
-/* strcat(dest, src) -- Append SRC on the end of DEST.
-   For Intel 80x86, x>=4.
-   Copyright (C) 1994-1997,2000,2003,2005 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@ipd.info.uni-karlsruhe.de>.
-   Optimised a little by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-#include "bp-sym.h"
-#include "bp-asm.h"
-
-#define PARMS	LINKAGE+4	/* space for 1 saved reg */
-#define RTN	PARMS
-#define DEST	RTN+RTN_SIZE
-#define SRC	DEST+PTR_SIZE
-
-	.text
-ENTRY (BP_SYM (strcat))
-	ENTER
-
-	pushl %edi		/* Save callee-safe register.  */
-	cfi_adjust_cfa_offset (4)
-
-	movl DEST(%esp), %edx
-	movl SRC(%esp), %ecx
-	CHECK_BOUNDS_LOW (%edx, DEST(%esp))
-	CHECK_BOUNDS_LOW (%ecx, SRC(%esp))
-
-	testb $0xff, (%ecx)	/* Is source string empty? */
-	jz L(8)			/* yes => return */
-
-	/* Test the first bytes separately until destination is aligned.  */
-	testl $3, %edx		/* destination pointer aligned? */
-	jz L(1)			/* yes => begin scan loop */
-	testb $0xff, (%edx)	/* is end of string? */
-	jz L(2)			/* yes => start appending */
-	incl %edx		/* increment source pointer */
-
-	testl $3, %edx		/* destination pointer aligned? */
-	jz L(1)			/* yes => begin scan loop */
-	testb $0xff, (%edx)	/* is end of string? */
-	jz L(2)			/* yes => start appending */
-	incl %edx		/* increment source pointer */
-
-	testl $3, %edx		/* destination pointer aligned? */
-	jz L(1)			/* yes => begin scan loop */
-	testb $0xff, (%edx)	/* is end of string? */
-	jz L(2)			/* yes => start appending */
-	incl %edx		/* increment source pointer */
-
-	/* Now we are aligned.  Begin scan loop.  */
-	jmp L(1)
-
-	cfi_rel_offset (edi, 0)
-	ALIGN(4)
-
-L(4):	addl $16,%edx		/* increment destination pointer for round */
-
-L(1):	movl (%edx), %eax	/* get word (= 4 bytes) in question */
-	movl $0xfefefeff, %edi	/* magic value */
-
-	/* If you compare this with the algorithm in memchr.S you will
-	   notice that here is an `xorl' statement missing.  But you must
-	   not forget that we are looking for C == 0 and `xorl $0, %eax'
-	   is a no-op.  */
-
-	addl %eax, %edi		/* add the magic value to the word.  We get
-				   carry bits reported for each byte which
-				   is *not* 0 */
-
-	/* According to the algorithm we had to reverse the effect of the
-	   XOR first and then test the overflow bits.  But because the
-	   following XOR would destroy the carry flag and it would (in a
-	   representation with more than 32 bits) not alter then last
-	   overflow, we can now test this condition.  If no carry is signaled
-	   no overflow must have occurred in the last byte => it was 0.	*/
-	jnc L(3)
-
-	/* We are only interested in carry bits that change due to the
-	   previous add, so remove original bits */
-	xorl %eax, %edi		/* ((word^charmask)+magic)^(word^charmask) */
-
-	/* Now test for the other three overflow bits.  */
-	orl $0xfefefeff, %edi	/* set all non-carry bits */
-	incl %edi		/* add 1: if one carry bit was *not* set
-				   the addition will not result in 0.  */
-
-	/* If at least one byte of the word is C we don't get 0 in %ecx.  */
-	jnz L(3)
-
-	movl 4(%edx), %eax	/* get word from source */
-	movl $0xfefefeff, %edi	/* magic value */
-	addl %eax, %edi		/* add the magic value to the word.  We get
-				   carry bits reported for each byte which
-				   is *not* 0 */
-	jnc L(5)		/* highest byte is C => stop copying */
-	xorl %eax, %edi		/* ((word^charmask)+magic)^(word^charmask) */
-	orl $0xfefefeff, %edi	/* set all non-carry bits */
-	incl %edi		/* add 1: if one carry bit was *not* set
-				   the addition will not result in 0.  */
-	jnz L(5)		/* one byte is NUL => stop copying */
-
-	movl 8(%edx), %eax	/* get word from source */
-	movl $0xfefefeff, %edi	/* magic value */
-	addl %eax, %edi		/* add the magic value to the word.  We get
-				   carry bits reported for each byte which
-				   is *not* 0 */
-	jnc L(6)		/* highest byte is C => stop copying */
-	xorl %eax, %edi		/* ((word^charmask)+magic)^(word^charmask) */
-	orl $0xfefefeff, %edi	/* set all non-carry bits */
-	incl %edi		/* add 1: if one carry bit was *not* set
-				   the addition will not result in 0.  */
-	jnz L(6)		/* one byte is NUL => stop copying */
-
-	movl 12(%edx), %eax	/* get word from source */
-	movl $0xfefefeff, %edi	/* magic value */
-	addl %eax, %edi		/* add the magic value to the word.  We get
-				   carry bits reported for each byte which
-				   is *not* 0 */
-	jnc L(7)		/* highest byte is C => stop copying */
-	xorl %eax, %edi		/* ((word^charmask)+magic)^(word^charmask) */
-	orl $0xfefefeff, %edi	/* set all non-carry bits */
-	incl %edi		/* add 1: if one carry bit was *not* set
-				   the addition will not result in 0.  */
-	jz L(4)			/* no byte is NUL => carry on copying */
-
-L(7):	addl $4, %edx		/* adjust source pointer */
-L(6):	addl $4, %edx
-L(5):	addl $4, %edx
-
-L(3):	testb %al, %al		/* is first byte NUL? */
-	jz L(2)			/* yes => start copying */
-	incl %edx		/* increment source pointer */
-
-	testb %ah, %ah		/* is second byte NUL? */
-	jz L(2)			/* yes => start copying */
-	incl %edx		/* increment source pointer */
-
-	testl $0xff0000, %eax	/* is third byte NUL? */
-	jz L(2)			/* yes => start copying */
-	incl %edx		/* increment source pointer */
-
-L(2):	subl %ecx, %edx		/* reduce number of loop variants */
-
-	/* Now we have to align the source pointer.  */
-	testl $3, %ecx		/* pointer correctly aligned? */
-	jz L(29)		/* yes => start copy loop */
-	movb (%ecx), %al	/* get first byte */
-	movb %al, (%ecx,%edx)	/* and store it */
-	andb %al, %al		/* is byte NUL? */
-	jz L(8)			/* yes => return */
-	incl %ecx		/* increment pointer */
-
-	testl $3, %ecx		/* pointer correctly aligned? */
-	jz L(29)		/* yes => start copy loop */
-	movb (%ecx), %al	/* get first byte */
-	movb %al, (%ecx,%edx)	/* and store it */
-	andb %al, %al		/* is byte NUL? */
-	jz L(8)			/* yes => return */
-	incl %ecx		/* increment pointer */
-
-	testl $3, %ecx		/* pointer correctly aligned? */
-	jz L(29)		/* yes => start copy loop */
-	movb (%ecx), %al	/* get first byte */
-	movb %al, (%ecx,%edx)	/* and store it */
-	andb %al, %al		/* is byte NUL? */
-	jz L(8)			/* yes => return */
-	incl %ecx		/* increment pointer */
-
-	/* Now we are aligned.  */
-	jmp L(29)		/* start copy loop */
-
-	ALIGN(4)
-
-L(28):	movl %eax, 12(%ecx,%edx)/* store word at destination */
-	addl $16, %ecx		/* adjust pointer for full round */
-
-L(29):	movl (%ecx), %eax	/* get word from source */
-	movl $0xfefefeff, %edi	/* magic value */
-	addl %eax, %edi		/* add the magic value to the word.  We get
-				   carry bits reported for each byte which
-				   is *not* 0 */
-	jnc L(9)		/* highest byte is C => stop copying */
-	xorl %eax, %edi		/* ((word^charmask)+magic)^(word^charmask) */
-	orl $0xfefefeff, %edi	/* set all non-carry bits */
-	incl %edi		/* add 1: if one carry bit was *not* set
-				   the addition will not result in 0.  */
-	jnz L(9)		/* one byte is NUL => stop copying */
-	movl %eax, (%ecx,%edx)	/* store word to destination */
-
-	movl 4(%ecx), %eax	/* get word from source */
-	movl $0xfefefeff, %edi	/* magic value */
-	addl %eax, %edi		/* add the magic value to the word.  We get
-				   carry bits reported for each byte which
-				   is *not* 0 */
-	jnc L(91)		/* highest byte is C => stop copying */
-	xorl %eax, %edi		/* ((word^charmask)+magic)^(word^charmask) */
-	orl $0xfefefeff, %edi	/* set all non-carry bits */
-	incl %edi		/* add 1: if one carry bit was *not* set
-				   the addition will not result in 0.  */
-	jnz L(91)		/* one byte is NUL => stop copying */
-	movl %eax, 4(%ecx,%edx)	/* store word to destination */
-
-	movl 8(%ecx), %eax	/* get word from source */
-	movl $0xfefefeff, %edi	/* magic value */
-	addl %eax, %edi		/* add the magic value to the word.  We get
-				   carry bits reported for each byte which
-				   is *not* 0 */
-	jnc L(92)		/* highest byte is C => stop copying */
-	xorl %eax, %edi		/* ((word^charmask)+magic)^(word^charmask) */
-	orl $0xfefefeff, %edi	/* set all non-carry bits */
-	incl %edi		/* add 1: if one carry bit was *not* set
-				   the addition will not result in 0.  */
-	jnz L(92)		/* one byte is NUL => stop copying */
-	movl %eax, 8(%ecx,%edx)	/* store word to destination */
-
-	movl 12(%ecx), %eax	/* get word from source */
-	movl $0xfefefeff, %edi	/* magic value */
-	addl %eax, %edi		/* add the magic value to the word.  We get
-				   carry bits reported for each byte which
-				   is *not* 0 */
-	jnc L(93)		/* highest byte is C => stop copying */
-	xorl %eax, %edi		/* ((word^charmask)+magic)^(word^charmask) */
-	orl $0xfefefeff, %edi	/* set all non-carry bits */
-	incl %edi		/* add 1: if one carry bit was *not* set
-				   the addition will not result in 0.  */
-	jz L(28)		/* no is NUL => carry on copying */
-
-L(93):	addl $4, %ecx		/* adjust pointer */
-L(92):	addl $4, %ecx
-L(91):	addl $4, %ecx
-
-L(9):	movb %al, (%ecx,%edx)	/* store first byte of last word */
-	orb %al, %al		/* is it NUL? */
-	jz L(8)			/* yes => return */
-
-	movb %ah, 1(%ecx,%edx)	/* store second byte of last word */
-	orb %ah, %ah		/* is it NUL? */
-	jz L(8)			/* yes => return */
-
-	shrl $16, %eax		/* make upper bytes accessible */
-	movb %al, 2(%ecx,%edx)	/* store third byte of last word */
-	orb %al, %al		/* is it NUL? */
-	jz L(8)			/* yes => return */
-
-	movb %ah, 3(%ecx,%edx)	/* store fourth byte of last word */
-
-L(8):	/* GKM FIXME: check high bounds */
-	movl DEST(%esp), %eax	/* start address of destination is result */
-	RETURN_BOUNDED_POINTER (DEST(%esp))
-	popl %edi		/* restore saved register */
-	cfi_adjust_cfa_offset (-4)
-	cfi_restore (edi)
-
-	LEAVE
-	RET_PTR
-END (BP_SYM (strcat))
-libc_hidden_builtin_def (strcat)
diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile
index 8946bfa..92a2b8f 100644
--- a/sysdeps/i386/i686/multiarch/Makefile
+++ b/sysdeps/i386/i686/multiarch/Makefile
@@ -14,8 +14,7 @@ sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \
 		   memcmp-ssse3 memcmp-sse4 strcasestr-nonascii varshift \
 		   strlen-sse2 strlen-sse2-bsf strncpy-c strcpy-ssse3 \
 		   strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 strcpy-sse2 \
-		   strncpy-sse2 stpcpy-sse2 stpncpy-sse2 strcat-ssse3 \
-		   strcat-sse2 strncat-ssse3 strncat-sse2 strncat-c \
+		   strncpy-sse2 stpcpy-sse2 stpncpy-sse2 \
 		   strchr-sse2 strrchr-sse2 strchr-sse2-bsf strrchr-sse2-bsf \
 		   memchr-sse2 memchr-sse2-bsf \
 		   memrchr-sse2 memrchr-sse2-bsf memrchr-c \
diff --git a/sysdeps/i386/i686/multiarch/strcat-sse2.S b/sysdeps/i386/i686/multiarch/strcat-sse2.S
deleted file mode 100644
index e75f92c..0000000
--- a/sysdeps/i386/i686/multiarch/strcat-sse2.S
+++ /dev/null
@@ -1,1243 +0,0 @@
-/* strcat with SSE2
-   Copyright (C) 2011-2012 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-
-#ifndef NOT_IN_libc
-
-# include <sysdep.h>
-
-
-# define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# ifdef SHARED
-#  define JMPTBL(I, B) I - B
-
-/* Load an entry in a jump table into ECX and branch to it.  TABLE is a
-	jump table with relative offsets.  INDEX is a register contains the
-	index into the jump table.   SCALE is the scale of INDEX. */
-
-#  define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE)	\
-	/* We first load PC into ECX.  */	\
-	SETUP_PIC_REG(cx);	\
-	/* Get the address of the jump table.  */	\
-	addl	$(TABLE - .), %ecx;	\
-	/* Get the entry and convert the relative offset to the	\
-	absolute address.  */	\
-	addl	(%ecx,INDEX,SCALE), %ecx;	\
-	/* We loaded the jump table and adjuested ECX. Go.  */	\
-	jmp	*%ecx
-# else
-#  define JMPTBL(I, B) I
-
-/* Branch to an entry in a jump table.  TABLE is a jump table with
-	absolute offsets.  INDEX is a register contains the index into the
-	jump table.  SCALE is the scale of INDEX. */
-
-#  define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE)	\
-	jmp	*TABLE(,INDEX,SCALE)
-# endif
-
-# ifndef STRCAT
-#  define STRCAT  __strcat_sse2
-# endif
-
-# define PARMS  4
-# define STR1  PARMS+4
-# define STR2  STR1+4
-
-# ifdef USE_AS_STRNCAT
-#  define LEN    STR2+8
-#  define STR3   STR1+4
-# else
-#  define STR3   STR1
-# endif
-
-# define USE_AS_STRCAT
-# ifdef USE_AS_STRNCAT
-#  define RETURN  POP(%ebx); POP(%esi); ret; CFI_PUSH(%ebx); CFI_PUSH(%esi);
-# else
-#  define RETURN  POP(%esi); ret; CFI_PUSH(%esi);
-# endif
-
-.text
-ENTRY (STRCAT)
-	PUSH	(%esi)
-	mov	STR1(%esp), %eax
-	mov	STR2(%esp), %esi
-# ifdef USE_AS_STRNCAT
-	PUSH	(%ebx)
-	movl	LEN(%esp), %ebx
-	test	%ebx, %ebx
-	jz	L(ExitZero)
-# endif
-	cmpb	$0, (%esi)
-	mov	%esi, %ecx
-	mov	%eax, %edx
-	jz	L(ExitZero)
-
-	and	$63, %ecx
-	and	$63, %edx
-	cmp	$32, %ecx
-	ja	L(StrlenCore7_1)
-	cmp	$48, %edx
-	ja	L(alignment_prolog)
-
-	pxor	%xmm0, %xmm0
-	pxor	%xmm4, %xmm4
-	pxor	%xmm7, %xmm7
-	movdqu	(%eax), %xmm1
-	movdqu	(%esi), %xmm5
-	pcmpeqb	%xmm1, %xmm0
-	movdqu	16(%esi), %xmm6
-	pmovmskb %xmm0, %ecx
-	pcmpeqb	%xmm5, %xmm4
-	pcmpeqb	%xmm6, %xmm7
-	test	%ecx, %ecx
-	jnz	L(exit_less16_)
-	mov	%eax, %ecx
-	and	$-16, %eax
-	jmp	L(loop_prolog)
-
-L(alignment_prolog):
-	pxor	%xmm0, %xmm0
-	pxor	%xmm4, %xmm4
-	mov	%edx, %ecx
-	pxor	%xmm7, %xmm7
-	and	$15, %ecx
-	and	$-16, %eax
-	pcmpeqb	(%eax), %xmm0
-	movdqu	(%esi), %xmm5
-	movdqu	16(%esi), %xmm6
-	pmovmskb %xmm0, %edx
-	pcmpeqb	%xmm5, %xmm4
-	shr	%cl, %edx
-	pcmpeqb	%xmm6, %xmm7
-	test	%edx, %edx
-	jnz	L(exit_less16)
-	add	%eax, %ecx
-
-	pxor	%xmm0, %xmm0
-L(loop_prolog):
-	pxor	%xmm1, %xmm1
-	pxor	%xmm2, %xmm2
-	pxor	%xmm3, %xmm3
-	.p2align 4
-L(align16_loop):
-	pcmpeqb	16(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	test	%edx, %edx
-	jnz	L(exit16)
-
-	pcmpeqb	32(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	test	%edx, %edx
-	jnz	L(exit32)
-
-	pcmpeqb	48(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	test	%edx, %edx
-	jnz	L(exit48)
-
-	pcmpeqb	64(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	64(%eax), %eax
-	test	%edx, %edx
-	jz	L(align16_loop)
-	bsf	%edx, %edx
-	add	%edx, %eax
-	jmp	L(StartStrcpyPart)
-
-	.p2align 4
-L(exit16):
-	bsf	%edx, %edx
-	lea	16(%eax, %edx), %eax
-	jmp	L(StartStrcpyPart)
-
-	.p2align 4
-L(exit32):
-	bsf	%edx, %edx
-	lea	32(%eax, %edx), %eax
-	jmp	L(StartStrcpyPart)
-
-	.p2align 4
-L(exit48):
-	bsf	%edx, %edx
-	lea	48(%eax, %edx), %eax
-	jmp	L(StartStrcpyPart)
-
-	.p2align 4
-L(exit_less16):
-	bsf	%edx, %edx
-	add	%ecx, %eax
-	add	%edx, %eax
-	jmp	L(StartStrcpyPart)
-
-	.p2align 4
-L(exit_less16_):
-	bsf	%ecx, %ecx
-	add	%ecx, %eax
-
-	.p2align 4
-L(StartStrcpyPart):
-	pmovmskb %xmm4, %edx
-# ifdef USE_AS_STRNCAT
-	cmp	$16, %ebx
-	jbe	L(CopyFrom1To16BytesTail1Case2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesTail1)
-
-	movdqu	%xmm5, (%eax)
-	pmovmskb %xmm7, %edx
-# ifdef USE_AS_STRNCAT
-	cmp	$32, %ebx
-	jbe	L(CopyFrom1To32Bytes1Case2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To32Bytes1)
-
-	mov	%esi, %ecx
-	and	$-16, %esi
-	and	$15, %ecx
-	pxor	%xmm0, %xmm0
-# ifdef USE_AS_STRNCAT
-	add	%ecx, %ebx
-# endif
-	sub	%ecx, %eax
-	jmp	L(Unalign16Both)
-
-L(StrlenCore7_1):
-	mov	%eax, %ecx
-	pxor	%xmm0, %xmm0
-	and	$15, %ecx
-	and	$-16, %eax
-	pcmpeqb	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	shr	%cl, %edx
-	test	%edx, %edx
-	jnz	L(exit_less16_1)
-	add	%eax, %ecx
-
-	pxor	%xmm0, %xmm0
-	pxor	%xmm1, %xmm1
-	pxor	%xmm2, %xmm2
-	pxor	%xmm3, %xmm3
-
-	.p2align 4
-L(align16_loop_1):
-	pcmpeqb	16(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	test	%edx, %edx
-	jnz	L(exit16_1)
-
-	pcmpeqb	32(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	test	%edx, %edx
-	jnz	L(exit32_1)
-
-	pcmpeqb	48(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	test	%edx, %edx
-	jnz	L(exit48_1)
-
-	pcmpeqb	64(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	64(%eax), %eax
-	test	%edx, %edx
-	jz	L(align16_loop_1)
-	bsf	%edx, %edx
-	add	%edx, %eax
-	jmp	L(StartStrcpyPart_1)
-
-	.p2align 4
-L(exit16_1):
-	bsf	%edx, %edx
-	lea	16(%eax, %edx), %eax
-	jmp	L(StartStrcpyPart_1)
-
-	.p2align 4
-L(exit32_1):
-	bsf	%edx, %edx
-	lea	32(%eax, %edx), %eax
-	jmp	L(StartStrcpyPart_1)
-
-	.p2align 4
-L(exit48_1):
-	bsf	%edx, %edx
-	lea	48(%eax, %edx), %eax
-	jmp	L(StartStrcpyPart_1)
-
-	.p2align 4
-L(exit_less16_1):
-	bsf	%edx, %edx
-	add	%ecx, %eax
-	add	%edx, %eax
-
-	.p2align 4
-L(StartStrcpyPart_1):
-	mov	%esi, %ecx
-	and	$15, %ecx
-	and	$-16, %esi
-	pxor	%xmm0, %xmm0
-	pxor	%xmm1, %xmm1
-
-# ifdef USE_AS_STRNCAT
-	cmp	$48, %ebx
-	ja      L(BigN)
-# endif
-	pcmpeqb	(%esi), %xmm1
-# ifdef USE_AS_STRNCAT
-	add	%ecx, %ebx
-# endif
-	pmovmskb %xmm1, %edx
-	shr	%cl, %edx
-# ifdef USE_AS_STRNCAT
-	cmp	$16, %ebx
-	jbe	L(CopyFrom1To16BytesTailCase2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesTail)
-
-	pcmpeqb	16(%esi), %xmm0
-	pmovmskb %xmm0, %edx
-# ifdef USE_AS_STRNCAT
-	cmp	$32, %ebx
-	jbe	L(CopyFrom1To32BytesCase2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To32Bytes)
-
-	movdqu	(%esi, %ecx), %xmm1   /* copy 16 bytes */
-	movdqu	%xmm1, (%eax)
-	sub	%ecx, %eax
-
-	.p2align 4
-L(Unalign16Both):
-	mov	$16, %ecx
-	movdqa	(%esi, %ecx), %xmm1
-	movaps	16(%esi, %ecx), %xmm2
-	movdqu	%xmm1, (%eax, %ecx)
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$16, %ecx
-# ifdef USE_AS_STRNCAT
-	sub	$48, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16Bytes)
-L(Unalign16BothBigN):
-	movaps	16(%esi, %ecx), %xmm3
-	movdqu	%xmm2, (%eax, %ecx)
-	pcmpeqb	%xmm3, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$16, %ecx
-# ifdef USE_AS_STRNCAT
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%esi, %ecx), %xmm4
-	movdqu	%xmm3, (%eax, %ecx)
-	pcmpeqb	%xmm4, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$16, %ecx
-# ifdef USE_AS_STRNCAT
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%esi, %ecx), %xmm1
-	movdqu	%xmm4, (%eax, %ecx)
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$16, %ecx
-# ifdef USE_AS_STRNCAT
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%esi, %ecx), %xmm2
-	movdqu	%xmm1, (%eax, %ecx)
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$16, %ecx
-# ifdef USE_AS_STRNCAT
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%esi, %ecx), %xmm3
-	movdqu	%xmm2, (%eax, %ecx)
-	pcmpeqb	%xmm3, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$16, %ecx
-# ifdef USE_AS_STRNCAT
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16Bytes)
-
-	movdqu	%xmm3, (%eax, %ecx)
-	mov	%esi, %edx
-	lea	16(%esi, %ecx), %esi
-	and	$-0x40, %esi
-	sub	%esi, %edx
-	sub	%edx, %eax
-# ifdef USE_AS_STRNCAT
-	lea	128(%ebx, %edx), %ebx
-# endif
-	movaps	(%esi), %xmm2
-	movaps	%xmm2, %xmm4
-	movaps	16(%esi), %xmm5
-	movaps	32(%esi), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	48(%esi), %xmm7
-	pminub	%xmm5, %xmm2
-	pminub	%xmm7, %xmm3
-	pminub	%xmm2, %xmm3
-	pcmpeqb	%xmm0, %xmm3
-	pmovmskb %xmm3, %edx
-# ifdef USE_AS_STRNCAT
-	sub	$64, %ebx
-	jbe	L(UnalignedLeaveCase2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(Unaligned64Leave)
-
-	.p2align 4
-L(Unaligned64Loop_start):
-	add	$64, %eax
-	add	$64, %esi
-	movdqu	%xmm4, -64(%eax)
-	movaps	(%esi), %xmm2
-	movdqa	%xmm2, %xmm4
-	movdqu	%xmm5, -48(%eax)
-	movaps	16(%esi), %xmm5
-	pminub	%xmm5, %xmm2
-	movaps	32(%esi), %xmm3
-	movdqu	%xmm6, -32(%eax)
-	movaps	%xmm3, %xmm6
-	movdqu	%xmm7, -16(%eax)
-	movaps	48(%esi), %xmm7
-	pminub	%xmm7, %xmm3
-	pminub	%xmm2, %xmm3
-	pcmpeqb	%xmm0, %xmm3
-	pmovmskb %xmm3, %edx
-# ifdef USE_AS_STRNCAT
-	sub	$64, %ebx
-	jbe	L(UnalignedLeaveCase2OrCase3)
-# endif
-	test	%edx, %edx
-	jz	L(Unaligned64Loop_start)
-
-L(Unaligned64Leave):
-	pxor	%xmm1, %xmm1
-
-	pcmpeqb	%xmm4, %xmm0
-	pcmpeqb	%xmm5, %xmm1
-	pmovmskb %xmm0, %edx
-	pmovmskb %xmm1, %ecx
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesUnaligned_0)
-	test	%ecx, %ecx
-	jnz	L(CopyFrom1To16BytesUnaligned_16)
-
-	pcmpeqb	%xmm6, %xmm0
-	pcmpeqb	%xmm7, %xmm1
-	pmovmskb %xmm0, %edx
-	pmovmskb %xmm1, %ecx
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesUnaligned_32)
-
-	bsf	%ecx, %edx
-	movdqu	%xmm4, (%eax)
-	movdqu	%xmm5, 16(%eax)
-	movdqu	%xmm6, 32(%eax)
-	add	$48, %esi
-	add	$48, %eax
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-# ifdef USE_AS_STRNCAT
-	.p2align 4
-L(BigN):
-	pcmpeqb	(%esi), %xmm1
-	pmovmskb %xmm1, %edx
-	shr	%cl, %edx
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesTail)
-
-	pcmpeqb	16(%esi), %xmm0
-	pmovmskb %xmm0, %edx
-	test	%edx, %edx
-	jnz	L(CopyFrom1To32Bytes)
-
-	movdqu	(%esi, %ecx), %xmm1   /* copy 16 bytes */
-	movdqu	%xmm1, (%eax)
-	sub	%ecx, %eax
-	sub     $48, %ebx
-	add     %ecx, %ebx
-
-	mov	$16, %ecx
-	movdqa	(%esi, %ecx), %xmm1
-	movaps	16(%esi, %ecx), %xmm2
-	movdqu	%xmm1, (%eax, %ecx)
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$16, %ecx
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16Bytes)
-	jmp	L(Unalign16BothBigN)
-# endif
-
-/*------------end of main part-------------------------------*/
-
-/* Case1 */
-	.p2align 4
-L(CopyFrom1To16Bytes):
-	add	%ecx, %eax
-	add	%ecx, %esi
-	bsf	%edx, %edx
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-	.p2align 4
-L(CopyFrom1To16BytesTail):
-	add	%ecx, %esi
-	bsf	%edx, %edx
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-	.p2align 4
-L(CopyFrom1To32Bytes1):
-	add	$16, %esi
-	add	$16, %eax
-L(CopyFrom1To16BytesTail1):
-	bsf	%edx, %edx
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-	.p2align 4
-L(CopyFrom1To32Bytes):
-	bsf	%edx, %edx
-	add	%ecx, %esi
-	add	$16, %edx
-	sub	%ecx, %edx
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-	.p2align 4
-L(CopyFrom1To16BytesUnaligned_0):
-	bsf	%edx, %edx
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-	.p2align 4
-L(CopyFrom1To16BytesUnaligned_16):
-	bsf	%ecx, %edx
-	movdqu	%xmm4, (%eax)
-	add	$16, %esi
-	add	$16, %eax
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-	.p2align 4
-L(CopyFrom1To16BytesUnaligned_32):
-	bsf	%edx, %edx
-	movdqu	%xmm4, (%eax)
-	movdqu	%xmm5, 16(%eax)
-	add	$32, %esi
-	add	$32, %eax
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-# ifdef USE_AS_STRNCAT
-
-	.p2align 4
-L(CopyFrom1To16BytesExit):
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-/* Case2 */
-
-	.p2align 4
-L(CopyFrom1To16BytesCase2):
-	add	$16, %ebx
-	add	%ecx, %eax
-	add	%ecx, %esi
-	bsf	%edx, %edx
-	cmp	%ebx, %edx
-	jb	L(CopyFrom1To16BytesExit)
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
-	.p2align 4
-L(CopyFrom1To32BytesCase2):
-	sub	%ecx, %ebx
-	add	%ecx, %esi
-	bsf	%edx, %edx
-	add	$16, %edx
-	sub	%ecx, %edx
-	cmp	%ebx, %edx
-	jb	L(CopyFrom1To16BytesExit)
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
-L(CopyFrom1To16BytesTailCase2):
-	sub	%ecx, %ebx
-	add	%ecx, %esi
-	bsf	%edx, %edx
-	cmp	%ebx, %edx
-	jb	L(CopyFrom1To16BytesExit)
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
-L(CopyFrom1To16BytesTail1Case2):
-	bsf	%edx, %edx
-	cmp	%ebx, %edx
-	jb	L(CopyFrom1To16BytesExit)
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
-/* Case2 or Case3,  Case3 */
-
-	.p2align 4
-L(CopyFrom1To16BytesCase2OrCase3):
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesCase2)
-L(CopyFrom1To16BytesCase3):
-	add	$16, %ebx
-	add	%ecx, %eax
-	add	%ecx, %esi
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
-	.p2align 4
-L(CopyFrom1To32BytesCase2OrCase3):
-	test	%edx, %edx
-	jnz	L(CopyFrom1To32BytesCase2)
-	sub	%ecx, %ebx
-	add	%ecx, %esi
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
-	.p2align 4
-L(CopyFrom1To16BytesTailCase2OrCase3):
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesTailCase2)
-	sub	%ecx, %ebx
-	add	%ecx, %esi
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
-	.p2align 4
-L(CopyFrom1To32Bytes1Case2OrCase3):
-	add	$16, %eax
-	add	$16, %esi
-	sub	$16, %ebx
-L(CopyFrom1To16BytesTail1Case2OrCase3):
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesTail1Case2)
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
-# endif
-
-# ifdef USE_AS_STRNCAT
-	.p2align 4
-L(StrncatExit0):
-	movb	%bh, (%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-# endif
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit1):
-	movb	%bh, 1(%eax)
-# endif
-L(Exit1):
-# ifdef USE_AS_STRNCAT
-	movb	(%esi), %dh
-# endif
-	movb	%dh, (%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit2):
-	movb	%bh, 2(%eax)
-# endif
-L(Exit2):
-	movw	(%esi), %dx
-	movw	%dx, (%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit3):
-	movb	%bh, 3(%eax)
-# endif
-L(Exit3):
-	movw	(%esi), %cx
-	movw	%cx, (%eax)
-# ifdef USE_AS_STRNCAT
-	movb	2(%esi), %dh
-# endif
-	movb	%dh, 2(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit4):
-	movb	%bh, 4(%eax)
-# endif
-L(Exit4):
-	movl	(%esi), %edx
-	movl	%edx, (%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit5):
-	movb	%bh, 5(%eax)
-# endif
-L(Exit5):
-	movl	(%esi), %ecx
-# ifdef USE_AS_STRNCAT
-	movb	4(%esi), %dh
-# endif
-	movb	%dh, 4(%eax)
-	movl	%ecx, (%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit6):
-	movb	%bh, 6(%eax)
-# endif
-L(Exit6):
-	movl	(%esi), %ecx
-	movw	4(%esi), %dx
-	movl	%ecx, (%eax)
-	movw	%dx, 4(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit7):
-	movb	%bh, 7(%eax)
-# endif
-L(Exit7):
-	movl	(%esi), %ecx
-	movl	3(%esi), %edx
-	movl	%ecx, (%eax)
-	movl	%edx, 3(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit8):
-	movb	%bh, 8(%eax)
-# endif
-L(Exit8):
-	movlpd	(%esi), %xmm0
-	movlpd	%xmm0, (%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit9):
-	movb	%bh, 9(%eax)
-# endif
-L(Exit9):
-	movlpd	(%esi), %xmm0
-# ifdef USE_AS_STRNCAT
-	movb	8(%esi), %dh
-# endif
-	movb	%dh, 8(%eax)
-	movlpd	%xmm0, (%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit10):
-	movb	%bh, 10(%eax)
-# endif
-L(Exit10):
-	movlpd	(%esi), %xmm0
-	movw	8(%esi), %dx
-	movlpd	%xmm0, (%eax)
-	movw	%dx, 8(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit11):
-	movb	%bh, 11(%eax)
-# endif
-L(Exit11):
-	movlpd	(%esi), %xmm0
-	movl	7(%esi), %edx
-	movlpd	%xmm0, (%eax)
-	movl	%edx, 7(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit12):
-	movb	%bh, 12(%eax)
-# endif
-L(Exit12):
-	movlpd	(%esi), %xmm0
-	movl	8(%esi), %edx
-	movlpd	%xmm0, (%eax)
-	movl	%edx, 8(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit13):
-	movb	%bh, 13(%eax)
-# endif
-L(Exit13):
-	movlpd	(%esi), %xmm0
-	movlpd	5(%esi), %xmm1
-	movlpd	%xmm0, (%eax)
-	movlpd	%xmm1, 5(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit14):
-	movb	%bh, 14(%eax)
-# endif
-L(Exit14):
-	movlpd	(%esi), %xmm0
-	movlpd	6(%esi), %xmm1
-	movlpd	%xmm0, (%eax)
-	movlpd	%xmm1, 6(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit15):
-	movb	%bh, 15(%eax)
-# endif
-L(Exit15):
-	movlpd	(%esi), %xmm0
-	movlpd	7(%esi), %xmm1
-	movlpd	%xmm0, (%eax)
-	movlpd	%xmm1, 7(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit16):
-	movb	%bh, 16(%eax)
-# endif
-L(Exit16):
-	movdqu	(%esi), %xmm0
-	movdqu	%xmm0, (%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit17):
-	movb	%bh, 17(%eax)
-# endif
-L(Exit17):
-	movdqu	(%esi), %xmm0
-# ifdef USE_AS_STRNCAT
-	movb	16(%esi), %dh
-# endif
-	movdqu	%xmm0, (%eax)
-	movb	%dh, 16(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit18):
-	movb	%bh, 18(%eax)
-# endif
-L(Exit18):
-	movdqu	(%esi), %xmm0
-	movw	16(%esi), %cx
-	movdqu	%xmm0, (%eax)
-	movw	%cx, 16(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit19):
-	movb	%bh, 19(%eax)
-# endif
-L(Exit19):
-	movdqu	(%esi), %xmm0
-	movl	15(%esi), %ecx
-	movdqu	%xmm0, (%eax)
-	movl	%ecx, 15(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit20):
-	movb	%bh, 20(%eax)
-# endif
-L(Exit20):
-	movdqu	(%esi), %xmm0
-	movl	16(%esi), %ecx
-	movdqu	%xmm0, (%eax)
-	movl	%ecx, 16(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit21):
-	movb	%bh, 21(%eax)
-# endif
-L(Exit21):
-	movdqu	(%esi), %xmm0
-	movl	16(%esi), %ecx
-# ifdef USE_AS_STRNCAT
-	movb	20(%esi), %dh
-# endif
-	movdqu	%xmm0, (%eax)
-	movl	%ecx, 16(%eax)
-	movb	%dh, 20(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit22):
-	movb	%bh, 22(%eax)
-# endif
-L(Exit22):
-	movdqu	(%esi), %xmm0
-	movlpd	14(%esi), %xmm3
-	movdqu	%xmm0, (%eax)
-	movlpd	%xmm3, 14(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit23):
-	movb	%bh, 23(%eax)
-# endif
-L(Exit23):
-	movdqu	(%esi), %xmm0
-	movlpd	15(%esi), %xmm3
-	movdqu	%xmm0, (%eax)
-	movlpd	%xmm3, 15(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit24):
-	movb	%bh, 24(%eax)
-# endif
-L(Exit24):
-	movdqu	(%esi), %xmm0
-	movlpd	16(%esi), %xmm2
-	movdqu	%xmm0, (%eax)
-	movlpd	%xmm2, 16(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit25):
-	movb	%bh, 25(%eax)
-# endif
-L(Exit25):
-	movdqu	(%esi), %xmm0
-	movlpd	16(%esi), %xmm2
-# ifdef USE_AS_STRNCAT
-	movb	24(%esi), %dh
-# endif
-	movdqu	%xmm0, (%eax)
-	movlpd	%xmm2, 16(%eax)
-	movb	%dh, 24(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit26):
-	movb	%bh, 26(%eax)
-# endif
-L(Exit26):
-	movdqu	(%esi), %xmm0
-	movlpd	16(%esi), %xmm2
-	movw	24(%esi), %cx
-	movdqu	%xmm0, (%eax)
-	movlpd	%xmm2, 16(%eax)
-	movw	%cx, 24(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit27):
-	movb	%bh, 27(%eax)
-# endif
-L(Exit27):
-	movdqu	(%esi), %xmm0
-	movlpd	16(%esi), %xmm2
-	movl	23(%esi), %ecx
-	movdqu	%xmm0, (%eax)
-	movlpd	%xmm2, 16(%eax)
-	movl	%ecx, 23(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit28):
-	movb	%bh, 28(%eax)
-# endif
-L(Exit28):
-	movdqu	(%esi), %xmm0
-	movlpd	16(%esi), %xmm2
-	movl	24(%esi), %ecx
-	movdqu	%xmm0, (%eax)
-	movlpd	%xmm2, 16(%eax)
-	movl	%ecx, 24(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit29):
-	movb	%bh, 29(%eax)
-# endif
-L(Exit29):
-	movdqu	(%esi), %xmm0
-	movdqu	13(%esi), %xmm2
-	movdqu	%xmm0, (%eax)
-	movdqu	%xmm2, 13(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit30):
-	movb	%bh, 30(%eax)
-# endif
-L(Exit30):
-	movdqu	(%esi), %xmm0
-	movdqu	14(%esi), %xmm2
-	movdqu	%xmm0, (%eax)
-	movdqu	%xmm2, 14(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit31):
-	movb	%bh, 31(%eax)
-# endif
-L(Exit31):
-	movdqu	(%esi), %xmm0
-	movdqu	15(%esi), %xmm2
-	movdqu	%xmm0, (%eax)
-	movdqu	%xmm2, 15(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit32):
-	movb	%bh, 32(%eax)
-# endif
-L(Exit32):
-	movdqu	(%esi), %xmm0
-	movdqu	16(%esi), %xmm2
-	movdqu	%xmm0, (%eax)
-	movdqu	%xmm2, 16(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-# ifdef USE_AS_STRNCAT
-
-	.p2align 4
-L(UnalignedLeaveCase2OrCase3):
-	test	%edx, %edx
-	jnz	L(Unaligned64LeaveCase2)
-L(Unaligned64LeaveCase3):
-	lea	64(%ebx), %ecx
-	and	$-16, %ecx
-	add	$48, %ebx
-	jl	L(CopyFrom1To16BytesCase3)
-	movdqu	%xmm4, (%eax)
-	sub	$16, %ebx
-	jb	L(CopyFrom1To16BytesCase3)
-	movdqu	%xmm5, 16(%eax)
-	sub	$16, %ebx
-	jb	L(CopyFrom1To16BytesCase3)
-	movdqu	%xmm6, 32(%eax)
-	sub	$16, %ebx
-	jb	L(CopyFrom1To16BytesCase3)
-	movdqu	%xmm7, 48(%eax)
-	xor	%bh, %bh
-	movb	%bh, 64(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-L(Unaligned64LeaveCase2):
-	xor	%ecx, %ecx
-	pcmpeqb	%xmm4, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$48, %ebx
-	jle	L(CopyFrom1To16BytesCase2OrCase3)
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16Bytes)
-
-	pcmpeqb	%xmm5, %xmm0
-	pmovmskb %xmm0, %edx
-	movdqu	%xmm4, (%eax)
-	add	$16, %ecx
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16Bytes)
-
-	pcmpeqb	%xmm6, %xmm0
-	pmovmskb %xmm0, %edx
-	movdqu	%xmm5, 16(%eax)
-	add	$16, %ecx
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16Bytes)
-
-	pcmpeqb	%xmm7, %xmm0
-	pmovmskb %xmm0, %edx
-	movdqu	%xmm6, 32(%eax)
-	lea	16(%eax, %ecx), %eax
-	lea	16(%esi, %ecx), %esi
-	bsf	%edx, %edx
-	cmp	%ebx, %edx
-	jb	L(CopyFrom1To16BytesExit)
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-# endif
-	.p2align 4
-L(ExitZero):
-	RETURN
-
-END (STRCAT)
-
-	.p2align 4
-	.section .rodata
-L(ExitTable):
-	.int	JMPTBL(L(Exit1), L(ExitTable))
-	.int	JMPTBL(L(Exit2), L(ExitTable))
-	.int	JMPTBL(L(Exit3), L(ExitTable))
-	.int	JMPTBL(L(Exit4), L(ExitTable))
-	.int	JMPTBL(L(Exit5), L(ExitTable))
-	.int	JMPTBL(L(Exit6), L(ExitTable))
-	.int	JMPTBL(L(Exit7), L(ExitTable))
-	.int	JMPTBL(L(Exit8), L(ExitTable))
-	.int	JMPTBL(L(Exit9), L(ExitTable))
-	.int	JMPTBL(L(Exit10), L(ExitTable))
-	.int	JMPTBL(L(Exit11), L(ExitTable))
-	.int	JMPTBL(L(Exit12), L(ExitTable))
-	.int	JMPTBL(L(Exit13), L(ExitTable))
-	.int	JMPTBL(L(Exit14), L(ExitTable))
-	.int	JMPTBL(L(Exit15), L(ExitTable))
-	.int	JMPTBL(L(Exit16), L(ExitTable))
-	.int	JMPTBL(L(Exit17), L(ExitTable))
-	.int	JMPTBL(L(Exit18), L(ExitTable))
-	.int	JMPTBL(L(Exit19), L(ExitTable))
-	.int	JMPTBL(L(Exit20), L(ExitTable))
-	.int	JMPTBL(L(Exit21), L(ExitTable))
-	.int	JMPTBL(L(Exit22), L(ExitTable))
-	.int	JMPTBL(L(Exit23), L(ExitTable))
-	.int	JMPTBL(L(Exit24), L(ExitTable))
-	.int	JMPTBL(L(Exit25), L(ExitTable))
-	.int	JMPTBL(L(Exit26), L(ExitTable))
-	.int	JMPTBL(L(Exit27), L(ExitTable))
-	.int	JMPTBL(L(Exit28), L(ExitTable))
-	.int	JMPTBL(L(Exit29), L(ExitTable))
-	.int	JMPTBL(L(Exit30), L(ExitTable))
-	.int	JMPTBL(L(Exit31), L(ExitTable))
-	.int	JMPTBL(L(Exit32), L(ExitTable))
-# ifdef USE_AS_STRNCAT
-L(ExitStrncatTable):
-	.int	JMPTBL(L(StrncatExit0), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit1), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit2), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit3), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit4), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit5), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit6), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit7), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit8), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit9), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit10), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit11), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit12), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit13), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit14), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit15), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit16), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit17), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit18), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit19), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit20), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit21), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit22), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit23), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit24), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit25), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit26), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit27), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit28), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit29), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit30), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit31), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit32), L(ExitStrncatTable))
-# endif
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strcat-ssse3.S b/sysdeps/i386/i686/multiarch/strcat-ssse3.S
deleted file mode 100644
index 72bc49c..0000000
--- a/sysdeps/i386/i686/multiarch/strcat-ssse3.S
+++ /dev/null
@@ -1,572 +0,0 @@
-/* strcat with SSSE3
-   Copyright (C) 2011 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-	MERCHANTABILITY	or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-
-#ifndef NOT_IN_libc
-
-# include <sysdep.h>
-
-# define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# ifndef STRCAT
-#  define STRCAT  __strcat_ssse3
-# endif
-
-# define PARMS  4
-# define STR1  PARMS+4
-# define STR2  STR1+4
-
-# ifdef USE_AS_STRNCAT
-#  define LEN STR2+8
-# endif
-
-# define USE_AS_STRCAT
-
-.text
-ENTRY (STRCAT)
-	PUSH	(%edi)
-	mov	STR1(%esp), %edi
-	mov	%edi, %edx
-
-# define RETURN  jmp L(StartStrcpyPart)
-# include "strlen-sse2.S"
-
-L(StartStrcpyPart):
-	mov	STR2(%esp), %ecx
-	lea	(%edi, %eax), %edx
-# ifdef USE_AS_STRNCAT
-	PUSH	(%ebx)
-	mov	LEN(%esp), %ebx
-	test	%ebx, %ebx
-	jz	L(StrncatExit0)
-	cmp	$8, %ebx
-	jbe	L(StrncatExit8Bytes)
-# endif
-	cmpb	$0, (%ecx)
-	jz	L(Exit1)
-	cmpb	$0, 1(%ecx)
-	jz	L(Exit2)
-	cmpb	$0, 2(%ecx)
-	jz	L(Exit3)
-	cmpb	$0, 3(%ecx)
-	jz	L(Exit4)
-	cmpb	$0, 4(%ecx)
-	jz	L(Exit5)
-	cmpb	$0, 5(%ecx)
-	jz	L(Exit6)
-	cmpb	$0, 6(%ecx)
-	jz	L(Exit7)
-	cmpb	$0, 7(%ecx)
-	jz	L(Exit8)
-	cmpb	$0, 8(%ecx)
-	jz	L(Exit9)
-# ifdef USE_AS_STRNCAT
-	cmp	$16, %ebx
-	jb	L(StrncatExit15Bytes)
-# endif
-	cmpb	$0, 9(%ecx)
-	jz	L(Exit10)
-	cmpb	$0, 10(%ecx)
-	jz	L(Exit11)
-	cmpb	$0, 11(%ecx)
-	jz	L(Exit12)
-	cmpb	$0, 12(%ecx)
-	jz	L(Exit13)
-	cmpb	$0, 13(%ecx)
-	jz	L(Exit14)
-	cmpb	$0, 14(%ecx)
-	jz	L(Exit15)
-	cmpb	$0, 15(%ecx)
-	jz	L(Exit16)
-# ifdef USE_AS_STRNCAT
-	cmp	$16, %ebx
-	je	L(StrncatExit16)
-
-#  define RETURN1	\
-	POP	(%ebx);	\
-	POP	(%edi);	\
-	ret;	\
-	CFI_PUSH	(%ebx);	\
-	CFI_PUSH	(%edi)
-#  define USE_AS_STRNCPY
-# else
-#  define RETURN1  POP (%edi); ret; CFI_PUSH (%edi)
-# endif
-# include "strcpy-ssse3.S"
-	.p2align 4
-L(CopyFrom1To16Bytes):
-	add	%esi, %edx
-	add	%esi, %ecx
-
-	POP	(%esi)
-	test	%al, %al
-	jz	L(ExitHigh)
-	test	$0x01, %al
-	jnz	L(Exit1)
-	test	$0x02, %al
-	jnz	L(Exit2)
-	test	$0x04, %al
-	jnz	L(Exit3)
-	test	$0x08, %al
-	jnz	L(Exit4)
-	test	$0x10, %al
-	jnz	L(Exit5)
-	test	$0x20, %al
-	jnz	L(Exit6)
-	test	$0x40, %al
-	jnz	L(Exit7)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(ExitHigh):
-	test	$0x01, %ah
-	jnz	L(Exit9)
-	test	$0x02, %ah
-	jnz	L(Exit10)
-	test	$0x04, %ah
-	jnz	L(Exit11)
-	test	$0x08, %ah
-	jnz	L(Exit12)
-	test	$0x10, %ah
-	jnz	L(Exit13)
-	test	$0x20, %ah
-	jnz	L(Exit14)
-	test	$0x40, %ah
-	jnz	L(Exit15)
-	movlpd	(%ecx), %xmm0
-	movlpd	8(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 8(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit1):
-	movb	%bh, 1(%edx)
-L(Exit1):
-	movb	(%ecx), %al
-	movb	%al, (%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit2):
-	movb	%bh, 2(%edx)
-L(Exit2):
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit3):
-	movb	%bh, 3(%edx)
-L(Exit3):
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-	movb	2(%ecx), %al
-	movb	%al, 2(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit4):
-	movb	%bh, 4(%edx)
-L(Exit4):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit5):
-	movb	%bh, 5(%edx)
-L(Exit5):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movb	4(%ecx), %al
-	movb	%al, 4(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit6):
-	movb	%bh, 6(%edx)
-L(Exit6):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movw	4(%ecx), %ax
-	movw	%ax, 4(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit7):
-	movb	%bh, 7(%edx)
-L(Exit7):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	3(%ecx), %eax
-	movl	%eax, 3(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit8):
-	movb	%bh, 8(%edx)
-L(Exit8):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit9):
-	movb	%bh, 9(%edx)
-L(Exit9):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movb	8(%ecx), %al
-	movb	%al, 8(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit10):
-	movb	%bh, 10(%edx)
-L(Exit10):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movw	8(%ecx), %ax
-	movw	%ax, 8(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit11):
-	movb	%bh, 11(%edx)
-L(Exit11):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	7(%ecx), %eax
-	movl	%eax, 7(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit12):
-	movb	%bh, 12(%edx)
-L(Exit12):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	8(%ecx), %eax
-	movl	%eax, 8(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit13):
-	movb	%bh, 13(%edx)
-L(Exit13):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	5(%ecx), %xmm0
-	movlpd	%xmm0, 5(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit14):
-	movb	%bh, 14(%edx)
-L(Exit14):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	6(%ecx), %xmm0
-	movlpd	%xmm0, 6(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit15):
-	movb	%bh, 15(%edx)
-L(Exit15):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	7(%ecx), %xmm0
-	movlpd	%xmm0, 7(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit16):
-	movb	%bh, 16(%edx)
-L(Exit16):
-	movlpd	(%ecx), %xmm0
-	movlpd	8(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 8(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-# ifdef USE_AS_STRNCPY
-
-	CFI_PUSH(%esi)
-
-	.p2align 4
-L(CopyFrom1To16BytesCase2):
-	add	$16, %ebx
-	add	%esi, %ecx
-	lea	(%esi, %edx), %esi
-	lea	-9(%ebx), %edx
-	and	$1<<7, %dh
-	or	%al, %dh
-	test	%dh, %dh
-	lea	(%esi), %edx
-	POP	(%esi)
-	jz	L(ExitHighCase2)
-
-	test	$0x01, %al
-	jnz	L(Exit1)
-	cmp	$1, %ebx
-	je	L(StrncatExit1)
-	test	$0x02, %al
-	jnz	L(Exit2)
-	cmp	$2, %ebx
-	je	L(StrncatExit2)
-	test	$0x04, %al
-	jnz	L(Exit3)
-	cmp	$3, %ebx
-	je	L(StrncatExit3)
-	test	$0x08, %al
-	jnz	L(Exit4)
-	cmp	$4, %ebx
-	je	L(StrncatExit4)
-	test	$0x10, %al
-	jnz	L(Exit5)
-	cmp	$5, %ebx
-	je	L(StrncatExit5)
-	test	$0x20, %al
-	jnz	L(Exit6)
-	cmp	$6, %ebx
-	je	L(StrncatExit6)
-	test	$0x40, %al
-	jnz	L(Exit7)
-	cmp	$7, %ebx
-	je	L(StrncatExit7)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	lea	7(%edx), %eax
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-	xor	%cl, %cl
-	movb	%cl, (%eax)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(ExitHighCase2):
-	test	$0x01, %ah
-	jnz	L(Exit9)
-	cmp	$9, %ebx
-	je	L(StrncatExit9)
-	test	$0x02, %ah
-	jnz	L(Exit10)
-	cmp	$10, %ebx
-	je	L(StrncatExit10)
-	test	$0x04, %ah
-	jnz	L(Exit11)
-	cmp	$11, %ebx
-	je	L(StrncatExit11)
-	test	$0x8, %ah
-	jnz	L(Exit12)
-	cmp	$12, %ebx
-	je	L(StrncatExit12)
-	test	$0x10, %ah
-	jnz	L(Exit13)
-	cmp	$13, %ebx
-	je	L(StrncatExit13)
-	test	$0x20, %ah
-	jnz	L(Exit14)
-	cmp	$14, %ebx
-	je	L(StrncatExit14)
-	test	$0x40, %ah
-	jnz	L(Exit15)
-	cmp	$15, %ebx
-	je	L(StrncatExit15)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	8(%ecx), %xmm1
-	movlpd	%xmm1, 8(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	CFI_PUSH(%esi)
-
-L(CopyFrom1To16BytesCase2OrCase3):
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-
-	.p2align 4
-L(CopyFrom1To16BytesCase3):
-	add	$16, %ebx
-	add	%esi, %edx
-	add	%esi, %ecx
-
-	POP	(%esi)
-
-	cmp	$8, %ebx
-	ja	L(ExitHighCase3)
-	cmp	$1, %ebx
-	je	L(StrncatExit1)
-	cmp	$2, %ebx
-	je	L(StrncatExit2)
-	cmp	$3, %ebx
-	je	L(StrncatExit3)
-	cmp	$4, %ebx
-	je	L(StrncatExit4)
-	cmp	$5, %ebx
-	je	L(StrncatExit5)
-	cmp	$6, %ebx
-	je	L(StrncatExit6)
-	cmp	$7, %ebx
-	je	L(StrncatExit7)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movb	%bh, 8(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(ExitHighCase3):
-	cmp	$9, %ebx
-	je	L(StrncatExit9)
-	cmp	$10, %ebx
-	je	L(StrncatExit10)
-	cmp	$11, %ebx
-	je	L(StrncatExit11)
-	cmp	$12, %ebx
-	je	L(StrncatExit12)
-	cmp	$13, %ebx
-	je	L(StrncatExit13)
-	cmp	$14, %ebx
-	je	L(StrncatExit14)
-	cmp	$15, %ebx
-	je	L(StrncatExit15)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	8(%ecx), %xmm1
-	movlpd	%xmm1, 8(%edx)
-	movb	%bh, 16(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit0):
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit15Bytes):
-	cmp	$9, %ebx
-	je	L(StrncatExit9)
-	cmpb	$0, 9(%ecx)
-	jz	L(Exit10)
-	cmp	$10, %ebx
-	je	L(StrncatExit10)
-	cmpb	$0, 10(%ecx)
-	jz	L(Exit11)
-	cmp	$11, %ebx
-	je	L(StrncatExit11)
-	cmpb	$0, 11(%ecx)
-	jz	L(Exit12)
-	cmp	$12, %ebx
-	je	L(StrncatExit12)
-	cmpb	$0, 12(%ecx)
-	jz	L(Exit13)
-	cmp	$13, %ebx
-	je	L(StrncatExit13)
-	cmpb	$0, 13(%ecx)
-	jz	L(Exit14)
-	cmp	$14, %ebx
-	je	L(StrncatExit14)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	7(%ecx), %xmm0
-	movlpd	%xmm0, 7(%edx)
-	lea	14(%edx), %eax
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-	movb	%bh, (%eax)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit8Bytes):
-	cmpb	$0, (%ecx)
-	jz	L(Exit1)
-	cmp	$1, %ebx
-	je	L(StrncatExit1)
-	cmpb	$0, 1(%ecx)
-	jz	L(Exit2)
-	cmp	$2, %ebx
-	je	L(StrncatExit2)
-	cmpb	$0, 2(%ecx)
-	jz	L(Exit3)
-	cmp	$3, %ebx
-	je	L(StrncatExit3)
-	cmpb	$0, 3(%ecx)
-	jz	L(Exit4)
-	cmp	$4, %ebx
-	je	L(StrncatExit4)
-	cmpb	$0, 4(%ecx)
-	jz	L(Exit5)
-	cmp	$5, %ebx
-	je	L(StrncatExit5)
-	cmpb	$0, 5(%ecx)
-	jz	L(Exit6)
-	cmp	$6, %ebx
-	je	L(StrncatExit6)
-	cmpb	$0, 6(%ecx)
-	jz	L(Exit7)
-	cmp	$7, %ebx
-	je	L(StrncatExit7)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	lea	7(%edx), %eax
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-	movb	%bh, (%eax)
-	movl	%edi, %eax
-	RETURN1
-
-# endif
-END (STRCAT)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strcat.S b/sysdeps/i386/i686/multiarch/strcat.S
deleted file mode 100644
index e68feca..0000000
--- a/sysdeps/i386/i686/multiarch/strcat.S
+++ /dev/null
@@ -1,119 +0,0 @@
-/* Multiple versions of strcat
-   Copyright (C) 2011-2012 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-#ifndef USE_AS_STRNCAT
-# ifndef STRCAT
-#  define STRCAT strcat
-# endif
-#endif
-
-#ifdef USE_AS_STRNCAT
-# define STRCAT_SSSE3	__strncat_ssse3
-# define STRCAT_SSE2		__strncat_sse2
-# define STRCAT_IA32		__strncat_ia32
-# define __GI_STRCAT		__GI_strncat
-#else
-# define STRCAT_SSSE3	__strcat_ssse3
-# define STRCAT_SSE2		__strcat_sse2
-# define STRCAT_IA32		__strcat_ia32
-# define __GI_STRCAT		__GI_strcat
-#endif
-
-
-/* Define multiple versions only for the definition in libc.  Don't
-   define multiple versions for strncat in static library since we
-   need strncat before the initialization happened.  */
-#ifndef NOT_IN_libc
-
-# ifdef SHARED
-	.text
-ENTRY(STRCAT)
-	.type	STRCAT, @gnu_indirect_function
-	pushl	%ebx
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (ebx, 0)
-	LOAD_PIC_REG(bx)
-	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
-	jne	1f
-	call	__init_cpu_features
-1:	leal	STRCAT_IA32@GOTOFF(%ebx), %eax
-	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
-	jz	2f
-	leal	STRCAT_SSE2@GOTOFF(%ebx), %eax
-	testl	$bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx)
-	jnz	2f
-	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
-	jz	2f
-	leal	STRCAT_SSSE3@GOTOFF(%ebx), %eax
-2:	popl	%ebx
-	cfi_adjust_cfa_offset (-4)
-	cfi_restore (ebx)
-	ret
-END(STRCAT)
-# else
-
-ENTRY(STRCAT)
-	.type	STRCAT, @gnu_indirect_function
-	cmpl	$0, KIND_OFFSET+__cpu_features
-	jne	1f
-	call	__init_cpu_features
-1:	leal	STRCAT_IA32, %eax
-	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features
-	jz	2f
-	leal	STRCAT_SSE2, %eax
-	testl	$bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features
-	jnz	2f
-	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features
-	jz	2f
-	leal	STRCAT_SSSE3, %eax
-2:	ret
-END(STRCAT)
-
-# endif
-
-# undef ENTRY
-# define ENTRY(name) \
-	.type STRCAT_IA32, @function; \
-	.align 16; \
-	STRCAT_IA32: cfi_startproc; \
-	CALL_MCOUNT
-# undef END
-# define END(name) \
-	cfi_endproc; .size STRCAT_IA32, .-STRCAT_IA32
-
-# ifdef SHARED
-#  undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal strcat calls through a PLT.
-   The speedup we get from using SSSE3 instruction is likely eaten away
-   by the indirect call in the PLT.  */
-#  define libc_hidden_builtin_def(name) \
-	.globl __GI_STRCAT; __GI_STRCAT = STRCAT_IA32
-#  undef libc_hidden_def
-#  define libc_hidden_def(name) \
-	.globl __GI___STRCAT; __GI___STRCAT = STRCAT_IA32
-
-# endif
-#endif
-
-#ifndef USE_AS_STRNCAT
-# include "../../i486/strcat.S"
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strncat-c.c b/sysdeps/i386/i686/multiarch/strncat-c.c
deleted file mode 100644
index 132a000..0000000
--- a/sysdeps/i386/i686/multiarch/strncat-c.c
+++ /dev/null
@@ -1,8 +0,0 @@
-#define STRNCAT __strncat_ia32
-#ifdef SHARED
-#undef libc_hidden_def
-#define libc_hidden_def(name) \
-  __hidden_ver1 (__strncat_ia32, __GI___strncat, __strncat_ia32);
-#endif
-
-#include "string/strncat.c"
diff --git a/sysdeps/i386/i686/multiarch/strncat-sse2.S b/sysdeps/i386/i686/multiarch/strncat-sse2.S
deleted file mode 100644
index f1045b7..0000000
--- a/sysdeps/i386/i686/multiarch/strncat-sse2.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define STRCAT  __strncat_sse2
-#define USE_AS_STRNCAT
-
-#include "strcat-sse2.S"
diff --git a/sysdeps/i386/i686/multiarch/strncat-ssse3.S b/sysdeps/i386/i686/multiarch/strncat-ssse3.S
deleted file mode 100644
index 625b90a..0000000
--- a/sysdeps/i386/i686/multiarch/strncat-ssse3.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define STRCAT  __strncat_ssse3
-#define USE_AS_STRNCAT
-
-#include "strcat-ssse3.S"
diff --git a/sysdeps/i386/i686/multiarch/strncat.S b/sysdeps/i386/i686/multiarch/strncat.S
deleted file mode 100644
index fd569c2..0000000
--- a/sysdeps/i386/i686/multiarch/strncat.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define STRCAT strncat
-#define USE_AS_STRNCAT
-#include "strcat.S"
diff --git a/sysdeps/powerpc/strcat.c b/sysdeps/powerpc/strcat.c
deleted file mode 100644
index 28575d0..0000000
--- a/sysdeps/powerpc/strcat.c
+++ /dev/null
@@ -1,30 +0,0 @@
-/* strcat version that uses fast strcpy/strlen.
-   Copyright (C) 1997, 2003 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <string.h>
-
-#undef strcat
-
-/* Append SRC on the end of DEST.  */
-char *
-strcat (char *dest, const char *src)
-{
-  strcpy (dest + strlen (dest), src);
-  return dest;
-}
-libc_hidden_builtin_def (strcat)
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 22f1435..ae94366 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -6,7 +6,7 @@ endif
 
 ifeq ($(subdir),string)
 
-sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
+sysdep_routines +=  stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
 		   strend-sse4 memcmp-sse4 memcpy-ssse3 mempcpy-ssse3 \
 		   memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back \
 		   memmove-ssse3-back strcasestr-nonascii strcasecmp_l-ssse3 \
@@ -14,8 +14,7 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
 		   strcpy-ssse3 strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 \
 		   strcpy-sse2-unaligned strncpy-sse2-unaligned \
 		   stpcpy-sse2-unaligned stpncpy-sse2-unaligned \
-		   strcat-sse2-unaligned strncat-sse2-unaligned \
-		   strcat-ssse3 strncat-ssse3 strlen_atom strlen_avx \
+		   strlen_atom strlen_avx \
 		   strnlen-sse2-no-bsf strrchr-sse2-no-bsf strchr-sse2-no-bsf \
 		   memcmp-ssse3
 ifeq (yes,$(config-cflags-sse4))
diff --git a/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
deleted file mode 100644
index 7811ab5..0000000
--- a/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
+++ /dev/null
@@ -1,53 +0,0 @@
-/* strcat with SSE2
-   Copyright (C) 2011 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef NOT_IN_libc
-
-# include <sysdep.h>
-
-# ifndef STRCAT
-#  define STRCAT  __strcat_sse2_unaligned
-# endif
-
-# define USE_AS_STRCAT
-
-.text
-ENTRY (STRCAT)
-	mov	%rdi, %r9
-# ifdef USE_AS_STRNCAT
-	mov	%rdx, %r8
-# endif
-
-# define RETURN  jmp L(StartStrcpyPart)
-# include "strlen-sse2-pminub.S"
-# undef RETURN
-
-L(StartStrcpyPart):
-	lea	(%r9, %rax), %rdi
-	mov	%rsi, %rcx
-	mov	%r9, %rax      /* save result */
-
-# ifdef USE_AS_STRNCAT
-	test	%r8, %r8
-	jz	L(ExitZero)
-#  define USE_AS_STRNCPY
-# endif
-
-# include "strcpy-sse2-unaligned.S"
-#endif
diff --git a/sysdeps/x86_64/multiarch/strcat-ssse3.S b/sysdeps/x86_64/multiarch/strcat-ssse3.S
deleted file mode 100644
index abd2c0c..0000000
--- a/sysdeps/x86_64/multiarch/strcat-ssse3.S
+++ /dev/null
@@ -1,557 +0,0 @@
-/* strcat with SSSE3
-   Copyright (C) 2011 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef NOT_IN_libc
-
-# include <sysdep.h>
-
-# ifndef STRCAT
-#  define STRCAT  __strcat_ssse3
-# endif
-
-# define USE_AS_STRCAT
-
-.text
-ENTRY (STRCAT)
-# ifdef USE_AS_STRNCAT
-	mov	%rdx, %r8
-# endif
-
-# define RETURN  jmp L(StartStrcpyPart)
-# include "strlen-sse2-no-bsf.S"
-
-# undef RETURN
-
-L(StartStrcpyPart):
-	mov	%rsi, %rcx
-	lea	(%rdi, %rax), %rdx
-# ifdef USE_AS_STRNCAT
-	test	%r8, %r8
-	jz	L(StrncatExit0)
-	cmp	$8, %r8
-	jbe	L(StrncatExit8Bytes)
-# endif
-	cmpb	$0, (%rcx)
-	jz	L(Exit1)
-	cmpb	$0, 1(%rcx)
-	jz	L(Exit2)
-	cmpb	$0, 2(%rcx)
-	jz	L(Exit3)
-	cmpb	$0, 3(%rcx)
-	jz	L(Exit4)
-	cmpb	$0, 4(%rcx)
-	jz	L(Exit5)
-	cmpb	$0, 5(%rcx)
-	jz	L(Exit6)
-	cmpb	$0, 6(%rcx)
-	jz	L(Exit7)
-	cmpb	$0, 7(%rcx)
-	jz	L(Exit8)
-	cmpb	$0, 8(%rcx)
-	jz	L(Exit9)
-# ifdef USE_AS_STRNCAT
-	cmp	$16, %r8
-	jb	L(StrncatExit15Bytes)
-# endif
-	cmpb	$0, 9(%rcx)
-	jz	L(Exit10)
-	cmpb	$0, 10(%rcx)
-	jz	L(Exit11)
-	cmpb	$0, 11(%rcx)
-	jz	L(Exit12)
-	cmpb	$0, 12(%rcx)
-	jz	L(Exit13)
-	cmpb	$0, 13(%rcx)
-	jz	L(Exit14)
-	cmpb	$0, 14(%rcx)
-	jz	L(Exit15)
-	cmpb	$0, 15(%rcx)
-	jz	L(Exit16)
-# ifdef USE_AS_STRNCAT
-	cmp	$16, %r8
-	je	L(StrncatExit16)
-#  define USE_AS_STRNCPY
-# endif
-
-# include "strcpy-ssse3.S"
-
-	.p2align 4
-L(CopyFrom1To16Bytes):
-	add	%rsi, %rdx
-	add	%rsi, %rcx
-
-	test	%al, %al
-	jz	L(ExitHigh)
-	test	$0x01, %al
-	jnz	L(Exit1)
-	test	$0x02, %al
-	jnz	L(Exit2)
-	test	$0x04, %al
-	jnz	L(Exit3)
-	test	$0x08, %al
-	jnz	L(Exit4)
-	test	$0x10, %al
-	jnz	L(Exit5)
-	test	$0x20, %al
-	jnz	L(Exit6)
-	test	$0x40, %al
-	jnz	L(Exit7)
-	movlpd	(%rcx), %xmm0
-	movlpd	%xmm0, (%rdx)
-	mov	%rdi, %rax
-	ret
-
-	.p2align 4
-L(ExitHigh):
-	test	$0x01, %ah
-	jnz	L(Exit9)
-	test	$0x02, %ah
-	jnz	L(Exit10)
-	test	$0x04, %ah
-	jnz	L(Exit11)
-	test	$0x08, %ah
-	jnz	L(Exit12)
-	test	$0x10, %ah
-	jnz	L(Exit13)
-	test	$0x20, %ah
-	jnz	L(Exit14)
-	test	$0x40, %ah
-	jnz	L(Exit15)
-	movlpd	(%rcx), %xmm0
-	movlpd	8(%rcx), %xmm1
-	movlpd	%xmm0, (%rdx)
-	movlpd	%xmm1, 8(%rdx)
-	mov	%rdi, %rax
-	ret
-
-	.p2align 4
-L(StrncatExit1):
-	xor	%ah, %ah
-	movb	%ah, 1(%rdx)
-L(Exit1):
-	movb	(%rcx), %al
-	movb	%al, (%rdx)
-	mov	%rdi, %rax
-	ret
-
-	.p2align 4
-L(StrncatExit2):
-	xor	%ah, %ah
-	movb	%ah, 2(%rdx)
-L(Exit2):
-	movw	(%rcx), %ax
-	movw	%ax, (%rdx)
-	mov	%rdi, %rax
-	ret
-
-	.p2align 4
-L(StrncatExit3):
-	xor	%ah, %ah
-	movb	%ah, 3(%rdx)
-L(Exit3):
-	movw	(%rcx), %ax
-	movw	%ax, (%rdx)
-	movb	2(%rcx), %al
-	movb	%al, 2(%rdx)
-	mov	%rdi, %rax
-	ret
-
-	.p2align 4
-L(StrncatExit4):
-	xor	%ah, %ah
-	movb	%ah, 4(%rdx)
-L(Exit4):
-	mov	(%rcx), %eax
-	mov	%eax, (%rdx)
-	mov	%rdi, %rax
-	ret
-
-	.p2align 4
-L(StrncatExit5):
-	xor	%ah, %ah
-	movb	%ah, 5(%rdx)
-L(Exit5):
-	mov	(%rcx), %eax
-	mov	%eax, (%rdx)
-	movb	4(%rcx), %al
-	movb	%al, 4(%rdx)
-	mov	%rdi, %rax
-	ret
-
-	.p2align 4
-L(StrncatExit6):
-	xor	%ah, %ah
-	movb	%ah, 6(%rdx)
-L(Exit6):
-	mov	(%rcx), %eax
-	mov	%eax, (%rdx)
-	movw	4(%rcx), %ax
-	movw	%ax, 4(%rdx)
-	mov	%rdi, %rax
-	ret
-
-	.p2align 4
-L(StrncatExit7):
-	xor	%ah, %ah
-	movb	%ah, 7(%rdx)
-L(Exit7):
-	mov	(%rcx), %eax
-	mov	%eax, (%rdx)
-	mov	3(%rcx), %eax
-	mov	%eax, 3(%rdx)
-	mov	%rdi, %rax
-	ret
-
-	.p2align 4
-L(StrncatExit8):
-	xor	%ah, %ah
-	movb	%ah, 8(%rdx)
-L(Exit8):
-	movlpd	(%rcx), %xmm0
-	movlpd	%xmm0, (%rdx)
-	mov	%rdi, %rax
-	ret
-
-	.p2align 4
-L(StrncatExit9):
-	xor	%ah, %ah
-	movb	%ah, 9(%rdx)
-L(Exit9):
-	movlpd	(%rcx), %xmm0
-	movlpd	%xmm0, (%rdx)
-	movb	8(%rcx), %al
-	movb	%al, 8(%rdx)
-	mov	%rdi, %rax
-	ret
-
-	.p2align 4
-L(StrncatExit10):
-	xor	%ah, %ah
-	movb	%ah, 10(%rdx)
-L(Exit10):
-	movlpd	(%rcx), %xmm0
-	movlpd	%xmm0, (%rdx)
-	movw	8(%rcx), %ax
-	movw	%ax, 8(%rdx)
-	mov	%rdi, %rax
-	ret
-
-	.p2align 4
-L(StrncatExit11):
-	xor	%ah, %ah
-	movb	%ah, 11(%rdx)
-L(Exit11):
-	movlpd	(%rcx), %xmm0
-	movlpd	%xmm0, (%rdx)
-	mov	7(%rcx), %eax
-	mov	%eax, 7(%rdx)
-	mov	%rdi, %rax
-	ret
-
-	.p2align 4
-L(StrncatExit12):
-	xor	%ah, %ah
-	movb	%ah, 12(%rdx)
-L(Exit12):
-	movlpd	(%rcx), %xmm0
-	movlpd	%xmm0, (%rdx)
-	mov	8(%rcx), %eax
-	mov	%eax, 8(%rdx)
-	mov	%rdi, %rax
-	ret
-
-	.p2align 4
-L(StrncatExit13):
-	xor	%ah, %ah
-	movb	%ah, 13(%rdx)
-L(Exit13):
-	movlpd	(%rcx), %xmm0
-	movlpd	%xmm0, (%rdx)
-	movlpd	5(%rcx), %xmm1
-	movlpd	%xmm1, 5(%rdx)
-	mov	%rdi, %rax
-	ret
-
-	.p2align 4
-L(StrncatExit14):
-	xor	%ah, %ah
-	movb	%ah, 14(%rdx)
-L(Exit14):
-	movlpd	(%rcx), %xmm0
-	movlpd	%xmm0, (%rdx)
-	movlpd	6(%rcx), %xmm1
-	movlpd	%xmm1, 6(%rdx)
-	mov	%rdi, %rax
-	ret
-
-	.p2align 4
-L(StrncatExit15):
-	xor	%ah, %ah
-	movb	%ah, 15(%rdx)
-L(Exit15):
-	movlpd	(%rcx), %xmm0
-	movlpd	%xmm0, (%rdx)
-	movlpd	7(%rcx), %xmm1
-	movlpd	%xmm1, 7(%rdx)
-	mov	%rdi, %rax
-	ret
-
-	.p2align 4
-L(StrncatExit16):
-	xor	%ah, %ah
-	movb	%ah, 16(%rdx)
-L(Exit16):
-	movlpd	(%rcx), %xmm0
-	movlpd	8(%rcx), %xmm1
-	movlpd	%xmm0, (%rdx)
-	movlpd	%xmm1, 8(%rdx)
-	mov	%rdi, %rax
-	ret
-
-# ifdef USE_AS_STRNCPY
-
-	.p2align 4
-L(CopyFrom1To16BytesCase2):
-	add	$16, %r8
-	add	%rsi, %rcx
-	lea	(%rsi, %rdx), %rsi
-	lea	-9(%r8), %rdx
-	and	$1<<7, %dh
-	or	%al, %dh
-	test	%dh, %dh
-	lea	(%rsi), %rdx
-	jz	L(ExitHighCase2)
-
-	test	$0x01, %al
-	jnz	L(Exit1)
-	cmp	$1, %r8
-	je	L(StrncatExit1)
-	test	$0x02, %al
-	jnz	L(Exit2)
-	cmp	$2, %r8
-	je	L(StrncatExit2)
-	test	$0x04, %al
-	jnz	L(Exit3)
-	cmp	$3, %r8
-	je	L(StrncatExit3)
-	test	$0x08, %al
-	jnz	L(Exit4)
-	cmp	$4, %r8
-	je	L(StrncatExit4)
-	test	$0x10, %al
-	jnz	L(Exit5)
-	cmp	$5, %r8
-	je	L(StrncatExit5)
-	test	$0x20, %al
-	jnz	L(Exit6)
-	cmp	$6, %r8
-	je	L(StrncatExit6)
-	test	$0x40, %al
-	jnz	L(Exit7)
-	cmp	$7, %r8
-	je	L(StrncatExit7)
-	movlpd	(%rcx), %xmm0
-	movlpd	%xmm0, (%rdx)
-	lea	7(%rdx), %rax
-	cmpb	$1, (%rax)
-	sbb	$-1, %rax
-	xor	%cl, %cl
-	movb	%cl, (%rax)
-	mov	%rdi, %rax
-	ret
-
-	.p2align 4
-L(ExitHighCase2):
-	test	$0x01, %ah
-	jnz	L(Exit9)
-	cmp	$9, %r8
-	je	L(StrncatExit9)
-	test	$0x02, %ah
-	jnz	L(Exit10)
-	cmp	$10, %r8
-	je	L(StrncatExit10)
-	test	$0x04, %ah
-	jnz	L(Exit11)
-	cmp	$11, %r8
-	je	L(StrncatExit11)
-	test	$0x8, %ah
-	jnz	L(Exit12)
-	cmp	$12, %r8
-	je	L(StrncatExit12)
-	test	$0x10, %ah
-	jnz	L(Exit13)
-	cmp	$13, %r8
-	je	L(StrncatExit13)
-	test	$0x20, %ah
-	jnz	L(Exit14)
-	cmp	$14, %r8
-	je	L(StrncatExit14)
-	test	$0x40, %ah
-	jnz	L(Exit15)
-	cmp	$15, %r8
-	je	L(StrncatExit15)
-	movlpd	(%rcx), %xmm0
-	movlpd	%xmm0, (%rdx)
-	movlpd	8(%rcx), %xmm1
-	movlpd	%xmm1, 8(%rdx)
-	mov	%rdi, %rax
-	ret
-
-L(CopyFrom1To16BytesCase2OrCase3):
-	test	%rax, %rax
-	jnz	L(CopyFrom1To16BytesCase2)
-
-	.p2align 4
-L(CopyFrom1To16BytesCase3):
-	add	$16, %r8
-	add	%rsi, %rdx
-	add	%rsi, %rcx
-
-	cmp	$8, %r8
-	ja	L(ExitHighCase3)
-	cmp	$1, %r8
-	je	L(StrncatExit1)
-	cmp	$2, %r8
-	je	L(StrncatExit2)
-	cmp	$3, %r8
-	je	L(StrncatExit3)
-	cmp	$4, %r8
-	je	L(StrncatExit4)
-	cmp	$5, %r8
-	je	L(StrncatExit5)
-	cmp	$6, %r8
-	je	L(StrncatExit6)
-	cmp	$7, %r8
-	je	L(StrncatExit7)
-	movlpd	(%rcx), %xmm0
-	movlpd	%xmm0, (%rdx)
-	xor	%ah, %ah
-	movb	%ah, 8(%rdx)
-	mov	%rdi, %rax
-	ret
-
-	.p2align 4
-L(ExitHighCase3):
-	cmp	$9, %r8
-	je	L(StrncatExit9)
-	cmp	$10, %r8
-	je	L(StrncatExit10)
-	cmp	$11, %r8
-	je	L(StrncatExit11)
-	cmp	$12, %r8
-	je	L(StrncatExit12)
-	cmp	$13, %r8
-	je	L(StrncatExit13)
-	cmp	$14, %r8
-	je	L(StrncatExit14)
-	cmp	$15, %r8
-	je	L(StrncatExit15)
-	movlpd	(%rcx), %xmm0
-	movlpd	%xmm0, (%rdx)
-	movlpd	8(%rcx), %xmm1
-	movlpd	%xmm1, 8(%rdx)
-	xor	%ah, %ah
-	movb	%ah, 16(%rdx)
-	mov	%rdi, %rax
-	ret
-
-	.p2align 4
-L(StrncatExit0):
-	mov	%rdi, %rax
-	ret
-
-	.p2align 4
-L(StrncatExit15Bytes):
-	cmp	$9, %r8
-	je	L(StrncatExit9)
-	cmpb	$0, 9(%rcx)
-	jz	L(Exit10)
-	cmp	$10, %r8
-	je	L(StrncatExit10)
-	cmpb	$0, 10(%rcx)
-	jz	L(Exit11)
-	cmp	$11, %r8
-	je	L(StrncatExit11)
-	cmpb	$0, 11(%rcx)
-	jz	L(Exit12)
-	cmp	$12, %r8
-	je	L(StrncatExit12)
-	cmpb	$0, 12(%rcx)
-	jz	L(Exit13)
-	cmp	$13, %r8
-	je	L(StrncatExit13)
-	cmpb	$0, 13(%rcx)
-	jz	L(Exit14)
-	cmp	$14, %r8
-	je	L(StrncatExit14)
-	movlpd	(%rcx), %xmm0
-	movlpd	%xmm0, (%rdx)
-	movlpd	7(%rcx), %xmm1
-	movlpd	%xmm1, 7(%rdx)
-	lea	14(%rdx), %rax
-	cmpb	$1, (%rax)
-	sbb	$-1, %rax
-	xor	%cl, %cl
-	movb	%cl, (%rax)
-	mov	%rdi, %rax
-	ret
-
-	.p2align 4
-L(StrncatExit8Bytes):
-	cmpb	$0, (%rcx)
-	jz	L(Exit1)
-	cmp	$1, %r8
-	je	L(StrncatExit1)
-	cmpb	$0, 1(%rcx)
-	jz	L(Exit2)
-	cmp	$2, %r8
-	je	L(StrncatExit2)
-	cmpb	$0, 2(%rcx)
-	jz	L(Exit3)
-	cmp	$3, %r8
-	je	L(StrncatExit3)
-	cmpb	$0, 3(%rcx)
-	jz	L(Exit4)
-	cmp	$4, %r8
-	je	L(StrncatExit4)
-	cmpb	$0, 4(%rcx)
-	jz	L(Exit5)
-	cmp	$5, %r8
-	je	L(StrncatExit5)
-	cmpb	$0, 5(%rcx)
-	jz	L(Exit6)
-	cmp	$6, %r8
-	je	L(StrncatExit6)
-	cmpb	$0, 6(%rcx)
-	jz	L(Exit7)
-	cmp	$7, %r8
-	je	L(StrncatExit7)
-	movlpd	(%rcx), %xmm0
-	movlpd	%xmm0, (%rdx)
-	lea	7(%rdx), %rax
-	cmpb	$1, (%rax)
-	sbb	$-1, %rax
-	xor	%cl, %cl
-	movb	%cl, (%rax)
-	mov	%rdi, %rax
-	ret
-
-# endif
-END (STRCAT)
-#endif
diff --git a/sysdeps/x86_64/multiarch/strcat.S b/sysdeps/x86_64/multiarch/strcat.S
deleted file mode 100644
index 0c256de..0000000
--- a/sysdeps/x86_64/multiarch/strcat.S
+++ /dev/null
@@ -1,84 +0,0 @@
-/* Multiple versions of strcat
-   Copyright (C) 2009, 2011 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-#ifndef USE_AS_STRNCAT
-# ifndef STRCAT
-#  define STRCAT strcat
-# endif
-#endif
-
-#ifdef USE_AS_STRNCAT
-# define STRCAT_SSSE3	         	__strncat_ssse3
-# define STRCAT_SSE2	            	__strncat_sse2
-# define STRCAT_SSE2_UNALIGNED    	__strncat_sse2_unaligned
-# define __GI_STRCAT	            	__GI_strncat
-# define __GI___STRCAT              __GI___strncat
-#else
-# define STRCAT_SSSE3	         	__strcat_ssse3
-# define STRCAT_SSE2	            	__strcat_sse2
-# define STRCAT_SSE2_UNALIGNED    	__strcat_sse2_unaligned
-# define __GI_STRCAT	            	__GI_strcat
-# define __GI___STRCAT              __GI___strcat
-#endif
-
-
-/* Define multiple versions only for the definition in libc.  */
-#ifndef NOT_IN_libc
-	.text
-ENTRY(STRCAT)
-	.type	STRCAT, @gnu_indirect_function
-	cmpl	$0, __cpu_features+KIND_OFFSET(%rip)
-	jne	1f
-	call	__init_cpu_features
-1:	leaq	STRCAT_SSE2_UNALIGNED(%rip), %rax
-	testl	$bit_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_Fast_Unaligned_Load(%rip)
-	jnz	2f
-	leaq	STRCAT_SSE2(%rip), %rax
-	testl	$bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
-	jz	2f
-	leaq	STRCAT_SSSE3(%rip), %rax
-2:	ret
-END(STRCAT)
-
-# undef ENTRY
-# define ENTRY(name) \
-	.type STRCAT_SSE2, @function; \
-	.align 16; \
-	STRCAT_SSE2: cfi_startproc; \
-	CALL_MCOUNT
-# undef END
-# define END(name) \
-	cfi_endproc; .size STRCAT_SSE2, .-STRCAT_SSE2
-# undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal strcat calls through a PLT.
-   The speedup we get from using SSSE3 instruction is likely eaten away
-   by the indirect call in the PLT.  */
-# define libc_hidden_builtin_def(name) \
-	.globl __GI_STRCAT; __GI_STRCAT = STRCAT_SSE2
-# undef libc_hidden_def
-# define libc_hidden_def(name) \
-	.globl __GI___STRCAT; __GI___STRCAT = STRCAT_SSE2
-#endif
-
-#ifndef USE_AS_STRNCAT
-# include "../strcat.S"
-#endif
diff --git a/sysdeps/x86_64/multiarch/strncat-c.c b/sysdeps/x86_64/multiarch/strncat-c.c
deleted file mode 100644
index a3cdbff..0000000
--- a/sysdeps/x86_64/multiarch/strncat-c.c
+++ /dev/null
@@ -1,8 +0,0 @@
-#define STRNCAT __strncat_sse2
-#ifdef SHARED
-#undef libc_hidden_def
-#define libc_hidden_def(name) \
-  __hidden_ver1 (__strncat_sse2, __GI___strncat, __strncat_sse2);
-#endif
-
-#include "string/strncat.c"
diff --git a/sysdeps/x86_64/multiarch/strncat-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strncat-sse2-unaligned.S
deleted file mode 100644
index 133e1d2..0000000
--- a/sysdeps/x86_64/multiarch/strncat-sse2-unaligned.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define USE_AS_STRNCAT
-#define STRCAT __strncat_sse2_unaligned
-#include "strcat-sse2-unaligned.S"
diff --git a/sysdeps/x86_64/multiarch/strncat-ssse3.S b/sysdeps/x86_64/multiarch/strncat-ssse3.S
deleted file mode 100644
index 6c45ff3..0000000
--- a/sysdeps/x86_64/multiarch/strncat-ssse3.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define USE_AS_STRNCAT
-#define STRCAT __strncat_ssse3
-#include "strcat-ssse3.S"
diff --git a/sysdeps/x86_64/multiarch/strncat.S b/sysdeps/x86_64/multiarch/strncat.S
deleted file mode 100644
index fd569c2..0000000
--- a/sysdeps/x86_64/multiarch/strncat.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define STRCAT strncat
-#define USE_AS_STRNCAT
-#include "strcat.S"
diff --git a/sysdeps/x86_64/strcat.S b/sysdeps/x86_64/strcat.S
deleted file mode 100644
index 535a18d..0000000
--- a/sysdeps/x86_64/strcat.S
+++ /dev/null
@@ -1,259 +0,0 @@
-/* strcat(dest, src) -- Append SRC on the end of DEST.
-   Optimized for x86-64.
-   Copyright (C) 2002 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Andreas Jaeger <aj@suse.de>, 2002.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-#include "bp-sym.h"
-#include "bp-asm.h"
-
-
-	.text
-ENTRY (BP_SYM (strcat))
-	movq %rdi, %rcx		/* Dest. register. */
-	andl $7, %ecx		/* mask alignment bits */
-	movq %rdi, %rax		/* Duplicate destination pointer.  */
-	movq $0xfefefefefefefeff,%r8
-
-	/* First step: Find end of destination.  */
-	jz 4f			/* aligned => start loop */
-
-	neg %ecx		/* We need to align to 8 bytes.  */
-	addl $8,%ecx
-	/* Search the first bytes directly.  */
-0:	cmpb $0x0,(%rax)	/* is byte NUL? */
-	je 2f			/* yes => start copy */
-	incq %rax		/* increment pointer */
-	decl %ecx
-	jnz 0b
-
-
-
-	/* Now the source is aligned.  Scan for NUL byte.  */
-	.p2align 4
-4:
-	/* First unroll.  */
-	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */
-	addq $8,%rax		/* adjust pointer for next word */
-	movq %r8, %rdx		/* magic value */
-	addq %rcx, %rdx		/* add the magic value to the word.  We get
-				   carry bits reported for each byte which
-				   is *not* 0 */
-	jnc 3f			/* highest byte is NUL => return pointer */
-	xorq %rcx, %rdx		/* (word+magic)^word */
-	orq %r8, %rdx		/* set all non-carry bits */
-	incq %rdx		/* add 1: if one carry bit was *not* set
-				   the addition will not result in 0.  */
-	jnz 3f			/* found NUL => return pointer */
-
-	/* Second unroll.  */
-	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */
-	addq $8,%rax		/* adjust pointer for next word */
-	movq %r8, %rdx		/* magic value */
-	addq %rcx, %rdx		/* add the magic value to the word.  We get
-				   carry bits reported for each byte which
-				   is *not* 0 */
-	jnc 3f			/* highest byte is NUL => return pointer */
-	xorq %rcx, %rdx		/* (word+magic)^word */
-	orq %r8, %rdx		/* set all non-carry bits */
-	incq %rdx		/* add 1: if one carry bit was *not* set
-				   the addition will not result in 0.  */
-	jnz 3f			/* found NUL => return pointer */
-
-	/* Third unroll.  */
-	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */
-	addq $8,%rax		/* adjust pointer for next word */
-	movq %r8, %rdx		/* magic value */
-	addq %rcx, %rdx		/* add the magic value to the word.  We get
-				   carry bits reported for each byte which
-				   is *not* 0 */
-	jnc 3f			/* highest byte is NUL => return pointer */
-	xorq %rcx, %rdx		/* (word+magic)^word */
-	orq %r8, %rdx		/* set all non-carry bits */
-	incq %rdx		/* add 1: if one carry bit was *not* set
-				   the addition will not result in 0.  */
-	jnz 3f			/* found NUL => return pointer */
-
-	/* Fourth unroll.  */
-	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */
-	addq $8,%rax		/* adjust pointer for next word */
-	movq %r8, %rdx		/* magic value */
-	addq %rcx, %rdx		/* add the magic value to the word.  We get
-				   carry bits reported for each byte which
-				   is *not* 0 */
-	jnc 3f			/* highest byte is NUL => return pointer */
-	xorq %rcx, %rdx		/* (word+magic)^word */
-	orq %r8, %rdx		/* set all non-carry bits */
-	incq %rdx		/* add 1: if one carry bit was *not* set
-				   the addition will not result in 0.  */
-	jz 4b			/* no NUL found => continue loop */
-
-	.p2align 4		/* Align, it's a jump target.  */
-3:	subq $8,%rax		/* correct pointer increment.  */
-
-	testb %cl, %cl		/* is first byte NUL? */
-	jz 2f			/* yes => return */
-	incq %rax		/* increment pointer */
-
-	testb %ch, %ch		/* is second byte NUL? */
-	jz 2f			/* yes => return */
-	incq %rax		/* increment pointer */
-
-	testl $0x00ff0000, %ecx /* is third byte NUL? */
-	jz 2f			/* yes => return pointer */
-	incq %rax		/* increment pointer */
-
-	testl $0xff000000, %ecx /* is fourth byte NUL? */
-	jz 2f			/* yes => return pointer */
-	incq %rax		/* increment pointer */
-
-	shrq $32, %rcx		/* look at other half.  */
-
-	testb %cl, %cl		/* is first byte NUL? */
-	jz 2f			/* yes => return */
-	incq %rax		/* increment pointer */
-
-	testb %ch, %ch		/* is second byte NUL? */
-	jz 2f			/* yes => return */
-	incq %rax		/* increment pointer */
-
-	testl $0xff0000, %ecx	/* is third byte NUL? */
-	jz 2f			/* yes => return pointer */
-	incq %rax		/* increment pointer */
-
-2:
-	/* Second step: Copy source to destination.  */
-
-	movq	%rsi, %rcx	/* duplicate  */
-	andl	$7,%ecx		/* mask alignment bits */
-	movq	%rax, %rdx	/* move around */
-	jz	22f		/* aligned => start loop */
-
-	neg	%ecx		/* align to 8 bytes.  */
-	addl	$8, %ecx
-	/* Align the source pointer.  */
-21:
-	movb	(%rsi), %al	/* Fetch a byte */
-	testb	%al, %al	/* Is it NUL? */
-	movb	%al, (%rdx)	/* Store it */
-	jz	24f		/* If it was NUL, done! */
-	incq	%rsi
-	incq	%rdx
-	decl	%ecx
-	jnz	21b
-
-	/* Now the sources is aligned.  Unfortunatly we cannot force
-	   to have both source and destination aligned, so ignore the
-	   alignment of the destination.  */
-	.p2align 4
-22:
-	/* 1st unroll.  */
-	movq	(%rsi), %rax	/* Read double word (8 bytes).  */
-	addq	$8, %rsi	/* Adjust pointer for next word.  */
-	movq	%rax, %r9	/* Save a copy for NUL finding.  */
-	addq	%r8, %r9	/* add the magic value to the word.  We get
-				   carry bits reported for each byte which
-				   is *not* 0 */
-	jnc	23f		/* highest byte is NUL => return pointer */
-	xorq	%rax, %r9	/* (word+magic)^word */
-	orq	%r8, %r9	/* set all non-carry bits */
-	incq	%r9		/* add 1: if one carry bit was *not* set
-				   the addition will not result in 0.  */
-
-	jnz	23f		/* found NUL => return pointer */
-
-	movq	%rax, (%rdx)	/* Write value to destination.  */
-	addq	$8, %rdx	/* Adjust pointer.  */
-
-	/* 2nd unroll.  */
-	movq	(%rsi), %rax	/* Read double word (8 bytes).  */
-	addq	$8, %rsi	/* Adjust pointer for next word.  */
-	movq	%rax, %r9	/* Save a copy for NUL finding.  */
-	addq	%r8, %r9	/* add the magic value to the word.  We get
-				   carry bits reported for each byte which
-				   is *not* 0 */
-	jnc	23f		/* highest byte is NUL => return pointer */
-	xorq	%rax, %r9	/* (word+magic)^word */
-	orq	%r8, %r9	/* set all non-carry bits */
-	incq	%r9		/* add 1: if one carry bit was *not* set
-				   the addition will not result in 0.  */
-
-	jnz	23f		/* found NUL => return pointer */
-
-	movq	%rax, (%rdx)	/* Write value to destination.  */
-	addq	$8, %rdx	/* Adjust pointer.  */
-
-	/* 3rd unroll.  */
-	movq	(%rsi), %rax	/* Read double word (8 bytes).  */
-	addq	$8, %rsi	/* Adjust pointer for next word.  */
-	movq	%rax, %r9	/* Save a copy for NUL finding.  */
-	addq	%r8, %r9	/* add the magic value to the word.  We get
-				   carry bits reported for each byte which
-				   is *not* 0 */
-	jnc	23f		/* highest byte is NUL => return pointer */
-	xorq	%rax, %r9	/* (word+magic)^word */
-	orq	%r8, %r9	/* set all non-carry bits */
-	incq	%r9		/* add 1: if one carry bit was *not* set
-				   the addition will not result in 0.  */
-
-	jnz	23f		/* found NUL => return pointer */
-
-	movq	%rax, (%rdx)	/* Write value to destination.  */
-	addq	$8, %rdx	/* Adjust pointer.  */
-
-	/* 4th unroll.  */
-	movq	(%rsi), %rax	/* Read double word (8 bytes).  */
-	addq	$8, %rsi	/* Adjust pointer for next word.  */
-	movq	%rax, %r9	/* Save a copy for NUL finding.  */
-	addq	%r8, %r9	/* add the magic value to the word.  We get
-				   carry bits reported for each byte which
-				   is *not* 0 */
-	jnc	23f		/* highest byte is NUL => return pointer */
-	xorq	%rax, %r9	/* (word+magic)^word */
-	orq	%r8, %r9	/* set all non-carry bits */
-	incq	%r9		/* add 1: if one carry bit was *not* set
-				   the addition will not result in 0.  */
-
-	jnz	23f		/* found NUL => return pointer */
-
-	movq	%rax, (%rdx)	/* Write value to destination.  */
-	addq	$8, %rdx	/* Adjust pointer.  */
-	jmp	22b		/* Next iteration.  */
-
-	/* Do the last few bytes. %rax contains the value to write.
-	   The loop is unrolled twice.  */
-	.p2align 4
-23:
-	movb	%al, (%rdx)	/* 1st byte.  */
-	testb	%al, %al	/* Is it NUL.  */
-	jz	24f		/* yes, finish.  */
-	incq	%rdx		/* Increment destination.  */
-	movb	%ah, (%rdx)	/* 2nd byte.  */
-	testb	%ah, %ah	/* Is it NUL?.  */
-	jz	24f		/* yes, finish.  */
-	incq	%rdx		/* Increment destination.  */
-	shrq	$16, %rax	/* Shift...  */
-	jmp	23b		/* and look at next two bytes in %rax.  */
-
-
-24:
-	movq	%rdi, %rax	/* Source is return value.  */
-	retq
-END (BP_SYM (strcat))
-libc_hidden_builtin_def (strcat)
-- 
1.7.4.4




Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]