This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] powerpc: power7 strncmp optimization


Hi, 
  The following code change provides a throughput boost to the 64-bit
power7 strncmp code of approx 15%.  The 32-bit throughput is not notably
affected by this change, so the change to the 32-bit code is done to
keep the two files in sync with each other.

2011-08-04  Will Schmidt  <will_schmidt@vnet.ibm.com>

	* sysdeps/powerpc/powerpc32/power7/strncmp.S: Adjust the alignment
	and add nop instructions for throughput optimization.
	* sysdeps/powerpc/powerpc64/power7/strncmp.S: Adjust the alignment
	and nop instructions for throughput optimization.

diff --git a/sysdeps/powerpc/powerpc32/power7/strncmp.S b/sysdeps/powerpc/powerpc32/power7/strncmp.S
index 6a1422a..bcd13b6 100644
--- a/sysdeps/powerpc/powerpc32/power7/strncmp.S
+++ b/sysdeps/powerpc/powerpc32/power7/strncmp.S
@@ -27,7 +27,7 @@
 		     const char *s2 [r4],
 		     size_t size [r5])  */
 
-EALIGN (BP_SYM(strncmp),4,0)
+EALIGN (BP_SYM(strncmp),5,0)
 
 #define rTMP	r0
 #define rRTN	r3
@@ -47,9 +47,11 @@ EALIGN (BP_SYM(strncmp),4,0)
 #define rBITDIF	r11	/* bits that differ in s1 & s2 words */
 
 	dcbt	0,rSTR1
+	nop
 	or	rTMP,rSTR2,rSTR1
 	lis	r7F7F,0x7f7f
 	dcbt	0,rSTR2
+	nop
 	clrlwi.	rTMP,rTMP,30
 	cmplwi	cr1,rN,0
 	lis	rFEFE,-0x101
diff --git a/sysdeps/powerpc/powerpc64/power7/strncmp.S b/sysdeps/powerpc/powerpc64/power7/strncmp.S
index 498b19c..4c02b11 100644
--- a/sysdeps/powerpc/powerpc64/power7/strncmp.S
+++ b/sysdeps/powerpc/powerpc64/power7/strncmp.S
@@ -27,7 +27,7 @@
 		     const char *s2 [r4],
 		     size_t size [r5])  */
 
-EALIGN (BP_SYM(strncmp),4,0)
+EALIGN (BP_SYM(strncmp),5,0)
 	CALL_MCOUNT 3
 
 #define rTMP	r0
@@ -48,9 +48,11 @@ EALIGN (BP_SYM(strncmp),4,0)
 #define rBITDIF	r11	/* bits that differ in s1 & s2 words */
 
 	dcbt	0,rSTR1
+	nop
 	or	rTMP,rSTR2,rSTR1
 	lis	r7F7F,0x7f7f
 	dcbt	0,rSTR2
+	nop
 	clrldi.	rTMP,rTMP,61
 	cmpldi	cr1,rN,0
 	lis	rFEFE,-0x101



Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]