This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch master updated. glibc-2.18-331-g69f13db


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  69f13dbf06c6195de0ada8632271d58ca3cf55da (commit)
      from  151659f6371ce39a488fd132a5c8ce5e3bba983c (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=69f13dbf06c6195de0ada8632271d58ca3cf55da

commit 69f13dbf06c6195de0ada8632271d58ca3cf55da
Author: Adhemerval Zanella <azanella@linux.vnet.ibm.com>
Date:   Thu Sep 26 09:29:19 2013 -0500

    PowerPC: strcpy/stpcpy optimization for PPC64/POWER7
    
    This patch intends to unify both strcpy and stpcpy implementationsi
    for PPC64 and PPC64/POWER7. The idead default powerpc64 implementation
    is to provide both doubleword and word aligned memory access.
    
    For PPC64/POWER7 is also provide doubleword and word memory access,
    remove the branch hints, use the cmpb instruction for compare
    doubleword/words, and add an optimization for inputs of same alignment.

diff --git a/ChangeLog b/ChangeLog
index c2e5261..23f6b70 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+2013-10-04  Adhemerval Zanella  <azanella@linux.vnet.ibm.com>
+
+	* sysdeps/powerpc/powerpc64/strcpy.S (strcpy): Add word load/store
+	to provide a boost for large inputs with word alignment.
+	* sysdeps/powerpc/powerpc64/stpcpy.S (__stpcpy): Rewrite
+	implementation based on optimized PPC64 strcpy.
+	* sysdeps/powerpc/powerpc64/power7/strcpy.S: New file: optimized
+	strcpy for PPC64/POWER7 based on both doubleword and word load/store.
+	* sysdeps/powerpc/powerpc64/power7/stpcpy.S: New file: optimized
+	stpcpy for PPC64/POWER7 based on PPC64/POWER7 strcpy.
+
 2013-10-25   OndÅ?ej Bílka  <neleai@seznam.cz>
 
 	[BZ 2801]
diff --git a/sysdeps/powerpc/powerpc64/power7/stpcpy.S b/sysdeps/powerpc/powerpc64/power7/stpcpy.S
new file mode 100644
index 0000000..727dd06
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power7/stpcpy.S
@@ -0,0 +1,24 @@
+/* Optimized stpcpy implementation for PowerPC64/POWER7.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define USE_AS_STPCPY
+#include <sysdeps/powerpc/powerpc64/power7/strcpy.S>
+
+weak_alias (__stpcpy, stpcpy)
+libc_hidden_def (__stpcpy)
+libc_hidden_builtin_def (stpcpy)
diff --git a/sysdeps/powerpc/powerpc64/power7/strcpy.S b/sysdeps/powerpc/powerpc64/power7/strcpy.S
new file mode 100644
index 0000000..5c341a1
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power7/strcpy.S
@@ -0,0 +1,274 @@
+/* Optimized strcpy/stpcpy implementation for PowerPC64/POWER7.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* Implements the function
+
+   char * [r3] strcpy (char *dest [r3], const char *src [r4])
+
+   or
+
+   char * [r3] strcpy (char *dest [r3], const char *src [r4])
+
+   if USE_AS_STPCPY is defined. It tries to use aligned memory accesses
+   when possible using the following algorithm:
+
+   if (((((uintptr_t)dst & 0x7UL) == 0) && ((uintptr_t)src & 0x7UL) == 0))
+     goto aligned_doubleword_copy;
+   if (((((uintptr_t)dst & 0x3UL) == 0) && ((uintptr_t)src & 0x3UL) == 0))
+     goto aligned_word_copy;
+   if (((uintptr_t)dst & 0x7UL) == ((uintptr_t)src & 0x7UL))
+     goto same_alignment;
+   goto unaligned;
+
+   The aligned comparison are made using cmpb instructions.  */
+
+#ifdef USE_AS_STPCPY
+# define FUNC_NAME __stpcpy
+#else
+# define FUNC_NAME strcpy
+#endif
+
+	.machine  power7
+EALIGN (FUNC_NAME, 4, 0)
+	CALL_MCOUNT 2
+
+#define rTMP	r0
+#ifdef USE_AS_STPCPY
+#define rRTN	r3	/* pointer to previous word/doubleword in dest */
+#else
+#define rRTN	r12	/* pointer to previous word/doubleword in dest */
+#endif
+#define rSRC	r4	/* pointer to previous word/doubleword in src */
+#define rMASK	r5	/* mask 0xffffffff | 0xffffffffffffffff */
+#define rWORD	r6	/* current word from src */
+#define rALT	r7	/* alternate word from src */
+#define rRTNAL	r8	/* alignment of return pointer */
+#define rSRCAL	r9	/* alignment of source pointer */
+#define rALCNT	r10	/* bytes to read to reach 8 bytes alignment */
+#define rSUBAL	r11	/* doubleword minus unaligned displacement */
+
+#ifndef USE_AS_STPCPY
+/* Save the dst pointer to use as return value.  */
+	mr	rRTN, r3
+#endif
+	or	rTMP, rSRC, rRTN
+	clrldi.	rTMP, rTMP, 61
+	bne	L(check_word_alignment)
+	b	L(aligned_doubleword_copy)
+
+L(same_alignment):
+/* Src and dst with same alignment: align both to doubleword.  */
+	mr	rALCNT, rRTN
+	lbz	rWORD, 0(rSRC)
+	subfic	rSUBAL, rRTNAL, 8
+	addi	rRTN, rRTN, 1
+	addi	rSRC, rSRC, 1
+	cmpdi	cr7, rWORD, 0
+	stb	rWORD, 0(rALCNT)
+	beq	cr7, L(s2)
+
+	add	rALCNT, rALCNT, rSUBAL
+	subf	rALCNT, rRTN, rALCNT
+	addi	rALCNT, rALCNT, 1
+	mtctr	rALCNT
+	b	L(s1)
+
+	.align 4
+L(s0):
+	addi	rSRC, rSRC, 1
+	lbz	rWORD, -1(rSRC)
+	cmpdi	cr7, rWORD, 0
+	stb	rWORD, -1(rALCNT)
+	beqlr	cr7
+	mr	rRTN, rALCNT
+L(s1):
+	addi	rALCNT, rRTN,1
+	bdnz	L(s0)
+	b L(aligned_doubleword_copy)
+	.align 4
+L(s2):
+	mr	rRTN, rALCNT
+	blr
+
+/* For doubleword aligned memory, operate using doubleword load and stores.  */
+	.align 4
+L(aligned_doubleword_copy):
+	li	rMASK, 0
+	addi	rRTN, rRTN, -8
+	ld	rWORD, 0(rSRC)
+	b	L(g2)
+
+	.align 4
+L(g0):	ldu	rALT, 8(rSRC)
+	stdu	rWORD, 8(rRTN)
+	cmpb	rTMP, rALT, rMASK
+	cmpdi	rTMP, 0
+	bne	L(g1)
+	ldu	rWORD, 8(rSRC)
+	stdu	rALT, 8(rRTN)
+L(g2):	cmpb	rTMP, rWORD, rMASK
+	cmpdi	rTMP, 0		/* If rTMP is 0, no null's have been found.  */
+	beq	L(g0)
+
+	mr	rALT, rWORD
+/* We've hit the end of the string.  Do the rest byte-by-byte.  */
+L(g1):
+#ifdef __LITTLE_ENDIAN__
+	extrdi.	rTMP, rALT, 8, 56
+	stbu	rALT, 8(rRTN)
+	beqlr-
+	extrdi.	rTMP, rALT, 8, 48
+	stbu	rTMP, 1(rRTN)
+	beqlr-
+	extrdi.	rTMP, rALT, 8, 40
+	stbu	rTMP, 1(rRTN)
+	beqlr-
+	extrdi.	rTMP, rALT, 8, 32
+	stbu	rTMP, 1(rRTN)
+	beqlr-
+	extrdi.	rTMP, rALT, 8, 24
+	stbu	rTMP, 1(rRTN)
+	beqlr-
+	extrdi.	rTMP, rALT, 8, 16
+	stbu	rTMP, 1(rRTN)
+	beqlr-
+	extrdi.	rTMP, rALT, 8, 8
+	stbu	rTMP, 1(rRTN)
+	beqlr-
+	extrdi	rTMP, rALT, 8, 0
+	stbu	rTMP, 1(rRTN)
+#else
+	extrdi.	rTMP, rALT, 8, 0
+	stbu	rTMP, 8(rRTN)
+	beqlr
+	extrdi.	rTMP, rALT, 8, 8
+	stbu	rTMP, 1(rRTN)
+	beqlr
+	extrdi.	rTMP, rALT, 8, 16
+	stbu	rTMP, 1(rRTN)
+	beqlr
+	extrdi.	rTMP, rALT, 8, 24
+	stbu	rTMP, 1(rRTN)
+	beqlr
+	extrdi.	rTMP, rALT, 8, 32
+	stbu	rTMP, 1(rRTN)
+	beqlr
+	extrdi.	rTMP, rALT, 8, 40
+	stbu	rTMP, 1(rRTN)
+	beqlr
+	extrdi.	rTMP, rALT, 8, 48
+	stbu	rTMP, 1(rRTN)
+	beqlr
+	stbu	rALT, 1(rRTN)
+#endif
+	blr
+
+L(check_word_alignment):
+	clrldi. rTMP, rTMP, 62
+	beq	L(aligned_word_copy)
+	rldicl	rRTNAL, rRTN, 0, 61
+	rldicl	rSRCAL, rSRC, 0, 61
+	cmpld	cr7, rSRCAL, rRTNAL
+	beq	cr7, L(same_alignment)
+	b	L(unaligned)
+
+/* For word aligned memory, operate using word load and stores.  */
+	.align	4
+L(aligned_word_copy):
+	li	rMASK, 0
+	addi	rRTN, rRTN, -4
+	lwz	rWORD, 0(rSRC)
+	b	L(g5)
+
+	.align	4
+L(g3):	lwzu	rALT, 4(rSRC)
+	stwu	rWORD, 4(rRTN)
+	cmpb	rTMP, rALT, rMASK
+	cmpwi	rTMP, 0
+	bne	L(g4)
+	lwzu	rWORD, 4(rSRC)
+	stwu	rALT, 4(rRTN)
+L(g5):	cmpb	rTMP, rWORD, rMASK
+	cmpwi	rTMP, 0		/* If rTMP is 0, no null in word.  */
+	beq	L(g3)
+
+	mr      rALT, rWORD
+/* We've hit the end of the string.  Do the rest byte-by-byte.  */
+L(g4):
+#ifdef __LITTLE_ENDIAN__
+	rlwinm.	rTMP, rALT, 0, 24, 31
+	stbu	rALT, 4(rRTN)
+	beqlr-
+	rlwinm.	rTMP, rALT, 24, 24, 31
+	stbu	rTMP, 1(rRTN)
+	beqlr-
+	rlwinm.	rTMP, rALT, 16, 24, 31
+	stbu	rTMP, 1(rRTN)
+	beqlr-
+	rlwinm	rTMP, rALT, 8, 24, 31
+	stbu	rTMP, 1(rRTN)
+#else
+	rlwinm. rTMP, rALT, 8, 24, 31
+	stbu    rTMP, 4(rRTN)
+	beqlr
+	rlwinm. rTMP, rALT, 16, 24, 31
+	stbu    rTMP, 1(rRTN)
+	beqlr
+	rlwinm. rTMP, rALT, 24, 24, 31
+	stbu    rTMP, 1(rRTN)
+	beqlr
+	stbu    rALT, 1(rRTN)
+#endif
+	blr
+
+/* Oh well.  In this case, we just do a byte-by-byte copy.  */
+	.align	4
+L(unaligned):
+	lbz	rWORD, 0(rSRC)
+	addi	rRTN, rRTN, -1
+	cmpdi	rWORD, 0
+	beq	L(u2)
+
+	.align 	5
+L(u0):	lbzu	rALT, 1(rSRC)
+	stbu	rWORD, 1(rRTN)
+	cmpdi	rALT, 0
+	beq	L(u1)
+	lbzu	rWORD, 1(rSRC)
+	stbu	rALT, 1(rRTN)
+	cmpdi	rWORD, 0
+	beq	L(u2)
+	lbzu	rALT, 1(rSRC)
+	stbu	rWORD, 1(rRTN)
+	cmpdi	rALT, 0
+	beq	L(u1)
+	lbzu	rWORD, 1(rSRC)
+	stbu	rALT, 1(rRTN)
+	cmpdi	rWORD, 0
+	bne	L(u0)
+L(u2):	stbu	rWORD, 1(rRTN)
+	blr
+L(u1):	stbu	rALT, 1(rRTN)
+	blr
+END (FUNC_NAME)
+
+#ifndef USE_AS_STPCPY
+libc_hidden_builtin_def (strcpy)
+#endif
diff --git a/sysdeps/powerpc/powerpc64/stpcpy.S b/sysdeps/powerpc/powerpc64/stpcpy.S
index c0b3972..09aa3be 100644
--- a/sysdeps/powerpc/powerpc64/stpcpy.S
+++ b/sysdeps/powerpc/powerpc64/stpcpy.S
@@ -16,103 +16,8 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <sysdep.h>
-
-/* See strlen.s for comments on how the end-of-string testing works.  */
-
-/* char * [r3] stpcpy (char *dest [r3], const char *src [r4])  */
-
-EALIGN (__stpcpy, 4, 0)
-	CALL_MCOUNT 2
-
-#define rTMP	r0
-#define rRTN	r3
-#define rDEST	r3		/* pointer to previous word in dest */
-#define rSRC	r4		/* pointer to previous word in src */
-#define rWORD	r6		/* current word from src */
-#define rFEFE	r7		/* 0xfefefeff */
-#define r7F7F	r8		/* 0x7f7f7f7f */
-#define rNEG	r9		/* ~(word in src | 0x7f7f7f7f) */
-#define rALT	r10		/* alternate word from src */
-
-	or	rTMP, rSRC, rDEST
-	clrldi.	rTMP, rTMP, 62
-	addi	rDEST, rDEST, -4
-	bne	L(unaligned)
-
-	lis	rFEFE, -0x101
-	lis	r7F7F, 0x7f7f
-	lwz	rWORD, 0(rSRC)
-	addi	rFEFE, rFEFE, -0x101
-	addi	r7F7F, r7F7F, 0x7f7f
-	b	L(g2)
-
-L(g0):	lwzu	rALT, 4(rSRC)
-	stwu	rWORD, 4(rDEST)
-	add	rTMP, rFEFE, rALT
-	nor	rNEG, r7F7F, rALT
-	and.	rTMP, rTMP, rNEG
-	bne-	L(g1)
-	lwzu	rWORD, 4(rSRC)
-	stwu	rALT, 4(rDEST)
-L(g2):	add	rTMP, rFEFE, rWORD
-	nor	rNEG, r7F7F, rWORD
-	and.	rTMP, rTMP, rNEG
-	beq+	L(g0)
-
-	mr	rALT, rWORD
-/* We've hit the end of the string.  Do the rest byte-by-byte.  */
-L(g1):
-#ifdef __LITTLE_ENDIAN__
-	rlwinm.	rTMP, rALT, 0, 24, 31
-	stbu	rALT, 4(rDEST)
-	beqlr-
-	rlwinm.	rTMP, rALT, 24, 24, 31
-	stbu	rTMP, 1(rDEST)
-	beqlr-
-	rlwinm.	rTMP, rALT, 16, 24, 31
-	stbu	rTMP, 1(rDEST)
-	beqlr-
-	rlwinm	rTMP, rALT, 8, 24, 31
-	stbu	rTMP, 1(rDEST)
-	blr
-#else
-	rlwinm.	rTMP, rALT, 8, 24, 31
-	stbu	rTMP, 4(rDEST)
-	beqlr-
-	rlwinm.	rTMP, rALT, 16, 24, 31
-	stbu	rTMP, 1(rDEST)
-	beqlr-
-	rlwinm.	rTMP, rALT, 24, 24, 31
-	stbu	rTMP, 1(rDEST)
-	beqlr-
-	stbu	rALT, 1(rDEST)
-	blr
-#endif
-
-/* Oh well.  In this case, we just do a byte-by-byte copy.  */
-	.align 4
-	nop
-L(unaligned):
-	lbz	rWORD, 0(rSRC)
-	addi	rDEST, rDEST, 3
-	cmpwi	rWORD, 0
-	beq-	L(u2)
-
-L(u0):	lbzu	rALT, 1(rSRC)
-	stbu	rWORD, 1(rDEST)
-	cmpwi	rALT, 0
-	beq-	L(u1)
-	nop		/* Let 601 load start of loop.  */
-	lbzu	rWORD, 1(rSRC)
-	stbu	rALT, 1(rDEST)
-	cmpwi	rWORD, 0
-	bne+	L(u0)
-L(u2):	stbu	rWORD, 1(rDEST)
-	blr
-L(u1):	stbu	rALT, 1(rDEST)
-	blr
-END (__stpcpy)
+#define USE_AS_STPCPY
+#include <sysdeps/powerpc/powerpc64/strcpy.S>
 
 weak_alias (__stpcpy, stpcpy)
 libc_hidden_def (__stpcpy)
diff --git a/sysdeps/powerpc/powerpc64/strcpy.S b/sysdeps/powerpc/powerpc64/strcpy.S
index a7fd85b..793325d 100644
--- a/sysdeps/powerpc/powerpc64/strcpy.S
+++ b/sysdeps/powerpc/powerpc64/strcpy.S
@@ -22,25 +22,38 @@
 
 /* char * [r3] strcpy (char *dest [r3], const char *src [r4])  */
 
-EALIGN (strcpy, 4, 0)
+#ifdef USE_AS_STPCPY
+# define FUNC_NAME __stpcpy
+#else
+# define FUNC_NAME strcpy
+#endif
+
+EALIGN (FUNC_NAME, 4, 0)
 	CALL_MCOUNT 2
 
 #define rTMP	r0
-#define rRTN	r3	/* incoming DEST arg preserved as result */
-#define rSRC	r4	/* pointer to previous word in src */
-#define rDEST	r5	/* pointer to previous word in dest */
+#ifdef USE_AS_STPCPY
+#define rRTN    r3      /* pointer to previous word/doubleword in dest */
+#else
+#define rRTN    r12     /* pointer to previous word/doubleword in dest */
+#endif
+#define rSRC	r4	/* pointer to previous word/doubleword in src */
 #define rWORD	r6	/* current word from src */
-#define rFEFE	r7	/* constant 0xfefefefefefefeff (-0x0101010101010101) */
-#define r7F7F	r8	/* constant 0x7f7f7f7f7f7f7f7f */
-#define rNEG	r9	/* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
+#define rFEFE	r7	/* constant 0xfefefeff | 0xfefefefefefefeff */
+#define r7F7F	r8	/* constant 0x7f7f7f7f | 0x7f7f7f7f7f7f7f7f */
+#define rNEG	r9	/* ~(word in s1 | r7F7F) */
 #define rALT	r10	/* alternate word from src */
 
-	dcbt	0,rSRC
+#ifndef USE_AS_STPCPY
+/* Save the dst pointer to use as return value.  */
+	mr      rRTN, r3
+#endif
 	or	rTMP, rSRC, rRTN
 	clrldi.	rTMP, rTMP, 61
-	addi	rDEST, rRTN, -8
-	dcbtst	0,rRTN
-	bne	L(unaligned)
+	bne	L(check_word_alignment)
+
+/* For doubleword aligned memory, operate using doubleword load and stores.  */
+	addi	rRTN, rRTN, -8
 
 	lis	rFEFE, -0x101
 	lis	r7F7F, 0x7f7f
@@ -53,13 +66,13 @@ EALIGN (strcpy, 4, 0)
 	b	L(g2)
 
 L(g0):	ldu	rALT, 8(rSRC)
-	stdu	rWORD, 8(rDEST)
+	stdu	rWORD, 8(rRTN)
 	add	rTMP, rFEFE, rALT
 	nor	rNEG, r7F7F, rALT
 	and.	rTMP, rTMP, rNEG
 	bne-	L(g1)
 	ldu	rWORD, 8(rSRC)
-	stdu	rALT, 8(rDEST)
+	stdu	rALT, 8(rRTN)
 L(g2):	add	rTMP, rFEFE, rWORD
 	nor	rNEG, r7F7F, rWORD
 	and.	rTMP, rTMP, rNEG
@@ -70,77 +83,134 @@ L(g2):	add	rTMP, rFEFE, rWORD
 L(g1):
 #ifdef __LITTLE_ENDIAN__
 	extrdi.	rTMP, rALT, 8, 56
-	stb	rALT, 8(rDEST)
+	stbu	rALT, 8(rRTN)
 	beqlr-
 	extrdi.	rTMP, rALT, 8, 48
-	stb	rTMP, 9(rDEST)
+	stbu	rTMP, 1(rRTN)
 	beqlr-
 	extrdi.	rTMP, rALT, 8, 40
-	stb	rTMP, 10(rDEST)
+	stbu	rTMP, 1(rRTN)
 	beqlr-
 	extrdi.	rTMP, rALT, 8, 32
-	stb	rTMP, 11(rDEST)
+	stbu	rTMP, 1(rRTN)
 	beqlr-
 	extrdi.	rTMP, rALT, 8, 24
-	stb	rTMP, 12(rDEST)
+	stbu	rTMP, 1(rRTN)
 	beqlr-
 	extrdi.	rTMP, rALT, 8, 16
-	stb	rTMP, 13(rDEST)
+	stbu	rTMP, 1(rRTN)
 	beqlr-
 	extrdi.	rTMP, rALT, 8, 8
-	stb	rTMP, 14(rDEST)
+	stbu	rTMP, 1(rRTN)
 	beqlr-
 	extrdi	rTMP, rALT, 8, 0
-	stb	rTMP, 15(rDEST)
-	blr
+	stbu	rTMP, 1(rRTN)
 #else
 	extrdi.	rTMP, rALT, 8, 0
-	stb	rTMP, 8(rDEST)
+	stbu	rTMP, 8(rRTN)
 	beqlr-
 	extrdi.	rTMP, rALT, 8, 8
-	stb	rTMP, 9(rDEST)
+	stbu	rTMP, 1(rRTN)
 	beqlr-
 	extrdi.	rTMP, rALT, 8, 16
-	stb	rTMP, 10(rDEST)
+	stbu	rTMP, 1(rRTN)
 	beqlr-
 	extrdi.	rTMP, rALT, 8, 24
-	stb	rTMP, 11(rDEST)
+	stbu	rTMP, 1(rRTN)
 	beqlr-
 	extrdi.	rTMP, rALT, 8, 32
-	stb	rTMP, 12(rDEST)
-	beqlr-
+	stbu	rTMP, 1(rRTN)
+	beqlr
 	extrdi.	rTMP, rALT, 8, 40
-	stb	rTMP, 13(rDEST)
+	stbu	rTMP, 1(rRTN)
 	beqlr-
 	extrdi.	rTMP, rALT, 8, 48
-	stb	rTMP, 14(rDEST)
+	stbu	rTMP, 1(rRTN)
 	beqlr-
-	stb	rALT, 15(rDEST)
+	stbu	rALT, 1(rRTN)
+#endif
 	blr
+
+L(check_word_alignment):
+	clrldi. rTMP, rTMP, 62
+	bne     L(unaligned)
+
+/* For word aligned memory, operate using word load and stores.  */
+	addi	rRTN, rRTN, -4
+
+	lis	rFEFE, -0x101
+	lis	r7F7F, 0x7f7f
+	lwz	rWORD, 0(rSRC)
+	addi	rFEFE, rFEFE, -0x101
+	addi	r7F7F, r7F7F, 0x7f7f
+	b	L(g5)
+
+L(g3):	lwzu	rALT, 4(rSRC)
+	stwu	rWORD, 4(rRTN)
+	add	rTMP, rFEFE, rALT
+	nor	rNEG, r7F7F, rALT
+	and.	rTMP, rTMP, rNEG
+	bne-	L(g4)
+	lwzu	rWORD, 4(rSRC)
+	stwu	rALT, 4(rRTN)
+L(g5):	add	rTMP, rFEFE, rWORD
+	nor	rNEG, r7F7F, rWORD
+	and.	rTMP, rTMP, rNEG
+	beq+	L(g3)
+
+	mr	rALT, rWORD
+/* We've hit the end of the string.  Do the rest byte-by-byte.  */
+L(g4):
+#ifdef __LITTLE_ENDIAN__
+	rlwinm.	rTMP, rALT, 0, 24, 31
+	stbu	rALT, 4(rRTN)
+	beqlr-
+	rlwinm.	rTMP, rALT, 24, 24, 31
+	stbu	rTMP, 1(rRTN)
+	beqlr-
+	rlwinm.	rTMP, rALT, 16, 24, 31
+	stbu	rTMP, 1(rRTN)
+	beqlr-
+	rlwinm	rTMP, rALT, 8, 24, 31
+	stbu	rTMP, 1(rRTN)
+#else
+	rlwinm.	rTMP, rALT, 8, 24, 31
+	stbu	rTMP, 4(rRTN)
+	beqlr-
+	rlwinm.	rTMP, rALT, 16, 24, 31
+	stbu	rTMP, 1(rRTN)
+	beqlr-
+	rlwinm.	rTMP, rALT, 24, 24, 31
+	stbu	rTMP, 1(rRTN)
+	beqlr-
+	stbu	rALT, 1(rRTN)
 #endif
+	blr
 
 /* Oh well.  In this case, we just do a byte-by-byte copy.  */
 	.align 4
 	nop
 L(unaligned):
 	lbz	rWORD, 0(rSRC)
-	addi	rDEST, rRTN, -1
+	addi	rRTN, rRTN, -1
 	cmpwi	rWORD, 0
 	beq-	L(u2)
 
 L(u0):	lbzu	rALT, 1(rSRC)
-	stbu	rWORD, 1(rDEST)
+	stbu	rWORD, 1(rRTN)
 	cmpwi	rALT, 0
 	beq-	L(u1)
 	nop		/* Let 601 load start of loop.  */
 	lbzu	rWORD, 1(rSRC)
-	stbu	rALT, 1(rDEST)
+	stbu	rALT, 1(rRTN)
 	cmpwi	rWORD, 0
 	bne+	L(u0)
-L(u2):	stb	rWORD, 1(rDEST)
+L(u2):	stbu	rWORD, 1(rRTN)
 	blr
-L(u1):	stb	rALT, 1(rDEST)
+L(u1):	stbu	rALT, 1(rRTN)
 	blr
+END (FUNC_NAME)
 
-END (strcpy)
+#ifndef USE_AS_STPCPY
 libc_hidden_builtin_def (strcpy)
+#endif

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                                          |   11 +
 .../powerpc/powerpc64/power7/stpcpy.S              |   19 +-
 sysdeps/powerpc/powerpc64/power7/strcpy.S          |  274 ++++++++++++++++++++
 sysdeps/powerpc/powerpc64/stpcpy.S                 |   99 +-------
 sysdeps/powerpc/powerpc64/strcpy.S                 |  144 ++++++++---
 5 files changed, 400 insertions(+), 147 deletions(-)
 copy dlfcn/modstatic3.c => sysdeps/powerpc/powerpc64/power7/stpcpy.S (77%)
 create mode 100644 sysdeps/powerpc/powerpc64/power7/strcpy.S


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]