This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch, master, updated. glibc-2.14-209-gad69cc2


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  ad69cc2652c0422ebac3296d914c25e470498ce1 (commit)
      from  39dd69dfb9b8948720f05455df6775b70238803f (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=ad69cc2652c0422ebac3296d914c25e470498ce1

commit ad69cc2652c0422ebac3296d914c25e470498ce1
Author: David S. Miller <davem@davemloft.net>
Date:   Wed Aug 24 01:32:24 2011 -0700

    Rewritten v9/64-bit sparc strcmp.

diff --git a/ChangeLog b/ChangeLog
index 4fde8c2..ab754e5 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2011-08-24  David S. Miller  <davem@davemloft.net>
+
+	* sysdeps/sparc/sparc64/strcmp.S: Rewrite.
+
 2011-08-24  Andreas Schwab  <schwab@redhat.com>
 
 	* elf/Makefile: Add rules to build and run unload8 test.
diff --git a/sysdeps/sparc/sparc64/strcmp.S b/sysdeps/sparc/sparc64/strcmp.S
index fade4c4..263bb40 100644
--- a/sysdeps/sparc/sparc64/strcmp.S
+++ b/sysdeps/sparc/sparc64/strcmp.S
@@ -1,9 +1,8 @@
 /* Compare two strings for differences.
    For SPARC v9.
-   Copyright (C) 1997, 1999, 2003 Free Software Foundation, Inc.
+   Copyright (C) 2011 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
-   Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and
-                  Jakub Jelinek <jj@ultra.linux.cz>.
+   Contributed by David S. Miller <davem@davemloft.net>
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
@@ -22,259 +21,182 @@
 
 #include <sysdep.h>
 #include <asm/asi.h>
+
 #ifndef XCC
 	.register	%g2, #scratch
 	.register	%g3, #scratch
 	.register	%g6, #scratch
 #endif
 
-	/* Normally, this uses
-	   ((xword - 0x0101010101010101) & 0x8080808080808080) test
-	   to find out if any byte in xword could be zero. This is fast, but
-	   also gives false alarm for any byte in range 0x81-0xff. It does
-	   not matter for correctness, as if this test tells us there could
-	   be some zero byte, we check it byte by byte, but if bytes with
-	   high bits set are common in the strings, then this will give poor
-	   performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
-	   will use one tick slower, but more precise test
-	   ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
-	   which does not give any false alarms (but if some bits are set,
-	   one cannot assume from it which bytes are zero and which are not).
-	   It is yet to be measured, what is the correct default for glibc
-	   in these days for an average user.
+#define rSTR1		%o0
+#define rSTR2		%o1
+#define r0101		%o2	/* 0x0101010101010101 */
+#define r8080		%o3	/* 0x8080808080808080 */
+#define rSTRXOR		%o4
+#define rWORD1		%o5
+#define rTMP1		%g1
+#define rTMP2		%g2
+#define rWORD2		%g3
+#define rSLL		%g4
+#define rSRL		%g5
+#define rBARREL		%g6
+
+	/* There are two cases, either the two pointers are aligned
+	 * identically or they are not.  If they have the same
+	 * alignment we can use the normal full speed loop.  Otherwise
+	 * we have to use the barrel-shifter version.
 	 */
 
 	.text
-	.align		32
+	.align	32
 ENTRY(strcmp)
-	sethi		%hi(0x01010101), %g1			/* IEU0		Group		*/
-	andcc		%o0, 7, %g0				/* IEU1				*/
-	bne,pn		%icc, 7f				/* CTI				*/
-	 or		%g1, %lo(0x01010101), %g1		/* IEU0		Group		*/
-
-	andcc		%o1, 7, %g3				/* IEU1				*/
-	bne,pn		%icc, 9f				/* CTI				*/
-	 sllx		%g1, 32, %g2				/* IEU0		Group		*/
-	ldx		[%o0], %o2				/* Load				*/
-
-	or		%g1, %g2, %g1				/* IEU0		Group		*/
-1:	ldx		[%o1], %o3				/* Load				*/
-	sub		%o1, %o0, %o1				/* IEU1				*/
-	sllx		%g1, 7, %g2				/* IEU0		Group		*/
-
-2:	add		%o0, 8, %o0				/* IEU1				*/
-	sub		%o2, %g1, %g3				/* IEU0		Group		*/
-	subcc		%o2, %o3, %g0				/* IEU1				*/
-	bne,pn		%xcc, 13f				/* CTI				*/
-
-#ifdef EIGHTBIT_NOT_RARE
-	 andn		%g3, %o2, %g4				/* IEU0		Group		*/
-	ldxa		[%o0] ASI_PNF, %o2			/* Load				*/
-	andcc		%g4, %g2, %g0				/* IEU1		Group		*/
-#else
-	 ldxa		[%o0] ASI_PNF, %o2			/* Load		Group		*/
-	andcc		%g3, %g2, %g0				/* IEU1				*/
-#endif
-	be,a,pt		%xcc, 2b				/* CTI				*/
-	 ldxa		[%o1 + %o0] ASI_PNF, %o3		/* Load		Group		*/
-
-	addcc		%g3, %g1, %o4				/* IEU1				*/
-	srlx		%g3, 32, %g3				/* IEU0				*/
-	andcc		%g3, %g2, %g0				/* IEU1		Group		*/
-	be,pt		%xcc, 3f				/* CTI				*/
-
-	 srlx		%o4, 56, %o5				/* IEU0				*/
-	andcc		%o5, 0xff, %g0				/* IEU1		Group		*/
-	be,pn		%icc, 4f				/* CTI				*/
-	 srlx		%o4, 48, %o5				/* IEU0				*/
-
-	andcc		%o5, 0xff, %g0				/* IEU1		Group		*/
-	be,pn		%icc, 4f				/* CTI				*/
-	 srlx		%o4, 40, %o5				/* IEU0				*/
-	andcc		%o5, 0xff, %g0				/* IEU1		Group		*/
-
-	be,pn		%icc, 4f				/* CTI				*/
-	 srlx		%o4, 32, %o5				/* IEU0				*/
-	andcc		%o5, 0xff, %g0				/* IEU1		Group		*/
-	be,pn		%icc, 4f				/* CTI				*/
-
-3:	 srlx		%o4, 24, %o5				/* IEU0				*/
-	andcc		%o5, 0xff, %g0				/* IEU1		Group		*/
-	be,pn		%icc, 4f				/* CTI				*/
-	 srlx		%o4, 16, %o5				/* IEU0				*/
-
-	andcc		%o5, 0xff, %g0				/* IEU1		Group		*/
-	be,pn		%icc, 4f				/* CTI				*/
-	 srlx		%o4, 8, %o5				/* IEU0				*/
-	andcc		%o5, 0xff, %g0				/* IEU1		Group		*/
-
-	be,pn		%icc, 4f				/* CTI				*/
-	 andcc		%o4, 0xff, %g0				/* IEU1		Group		*/
-	bne,a,pn	%icc, 2b				/* CTI				*/
-	 ldxa		[%o1 + %o0] ASI_PNF, %o3		/* Load				*/
-
-4:	retl							/* CTI+IEU1	Group		*/
-	 clr		%o0					/* IEU0				*/
-
-	.align		32
-13:	mov		0xff, %g6				/* IEU0		Group		*/
-#ifdef EIGHTBIT_NOT_RARE
-	andcc		%g4, %g2, %g0				/* IEU1				*/
-#else
-	andcc		%g3, %g2, %g0				/* IEU1				*/
-#endif
-	be,pt		%xcc, 25f				/* CTI				*/
-	 addcc		%g3, %g1, %o4				/* IEU1		Group		*/
-
-	srlx		%g3, 32, %g3				/* IEU0				*/
-	andcc		%g3, %g2, %g0				/* IEU1		Group		*/
-	be,pt		%xcc, 23f				/* CTI				*/
-	 sllx		%g6, 56, %o5				/* IEU0				*/
-
-	andcc		%o4, %o5, %g0				/* IEU1		Group		*/
-	be,pn		%xcc, 24f				/* CTI				*/
-	 sllx		%g6, 48, %o5				/* IEU0				*/
-	andcc		%o4, %o5, %g0				/* IEU1		Group		*/
-
-	be,pn		%xcc, 24f				/* CTI				*/
-	 sllx		%g6, 40, %o5				/* IEU0				*/
-	andcc		%o4, %o5, %g0				/* IEU1		Group		*/
-	be,pn		%xcc, 24f				/* CTI				*/
-
-	 sllx		%g6, 32, %o5				/* IEU0				*/
-	andcc		%o4, %o5, %g0				/* IEU1		Group		*/
-	be,pn		%xcc, 24f				/* CTI				*/
-23:	 sllx		%g6, 24, %o5				/* IEU0				*/
-
-	andcc		%o4, %o5, %g0				/* IEU1		Group		*/
-	be,pn		%icc, 24f				/* CTI				*/
-	 sllx		%g6, 16, %o5				/* IEU0				*/
-	andcc		%o4, %o5, %g0				/* IEU1		Group		*/
-
-	be,pn		%icc, 24f				/* CTI				*/
-	 sllx		%g6, 8, %o5				/* IEU0				*/
-	andcc		%o4, %o5, %g0				/* IEU1		Group		*/
-	be,pn		%icc, 24f				/* CTI				*/
-
-	 mov		%g6, %o5				/* IEU0				*/
-25:	cmp		%o4, %o3				/* IEU1		Group		*/
-5:	mov		-1, %o0					/* IEU0				*/
-	retl							/* CTI+IEU1	Group		*/
-
-	 movgu		%xcc, 1, %o0				/* Single	Group		*/
-
-	.align		16
-24:	sub		%o5, 1, %g6				/* IEU0		Group		*/
-	clr		%o0					/* IEU1				*/
-	or		%o5, %g6, %o5				/* IEU0		Group		*/
-	andn		%o4, %o5, %o4				/* IEU0		Group		*/
-
-	andn		%o3, %o5, %o3				/* IEU1				*/
-	cmp		%o4, %o3				/* IEU1		Group		*/
-	movgu		%xcc, 1, %o0				/* Single	Group		*/
-	retl							/* CTI+IEU1	Group		*/
-
-	 movlu		%xcc, -1, %o0				/* Single	Group		*/
-6:	retl							/* CTI+IEU1	Group		*/
-	 mov		%o4, %o0				/* IEU0				*/
-
-	.align		16
-7:	ldub		[%o0], %o2				/* Load				*/
-	add		%o0, 1, %o0				/* IEU1				*/
-	ldub		[%o1], %o3				/* Load		Group		*/
-	sllx		%g1, 32, %g2				/* IEU0				*/
-
-8:	add		%o1, 1, %o1				/* IEU1				*/
-	subcc		%o2, %o3, %o4				/* IEU1		Group		*/
-	bne,pn		%xcc, 6b				/* CTI				*/
-	 lduba		[%o0] ASI_PNF, %o2			/* Load				*/
-
-	brz,pn		%o3, 4b					/* CTI+IEU1	Group		*/
-	 lduba		[%o1] ASI_PNF, %o3			/* Load				*/
-	andcc		%o0, 7, %g0				/* IEU1		Group		*/
-	bne,a,pn	%icc, 8b				/* CTI				*/
-
-	 add		%o0, 1, %o0				/* IEU0				*/
-	or		%g1, %g2, %g1				/* IEU0		Group		*/
-	andcc		%o1, 7, %g3				/* IEU1				*/
-	be,a,pn		%icc, 1b				/* CTI				*/
-
-	 ldxa		[%o0] ASI_PNF, %o2			/* Load		Group		*/
-9:	sllx		%g3, 3, %g5				/* IEU0				*/
-	mov		64, %o5					/* IEU1				*/
-	sub		%o1, %g3, %o1				/* IEU0		Group		*/
-
-	sub		%o5, %g5, %o5				/* IEU1				*/
-	ldxa		[%o1] ASI_PNF, %g6			/* Load		Group		*/
-	or		%g1, %g2, %g1				/* IEU0				*/
-	sub		%o1, %o0, %o1				/* IEU1				*/
-
-	sllx		%g1, 7, %g2				/* IEU0		Group		*/
-	add		%o1, 8, %o1				/* IEU1				*/
-								/* %g1 = 0101010101010101
-								 * %g2 = 8080808080800880
-								 * %g5 = number of bits to shift left
-								 * %o5 = number of bits to shift right */
-10:	sllx		%g6, %g5, %o3				/* IEU0		Group		*/
-	ldxa		[%o1 + %o0] ASI_PNF, %g6		/* Load				*/
-
-11:	srlx		%g6, %o5, %o4				/* IEU0		Group		*/
-	ldxa		[%o0] ASI_PNF, %o2			/* Load				*/
-	or		%o3, %o4, %o3				/* IEU1				*/
-	add		%o0, 8, %o0				/* IEU0		Group		*/
-
-	subcc		%o2, %o3, %g0				/* IEU1				*/
-#ifdef EIGHTBIT_NOT_RARE
-	sub		%o2, %g1, %g3				/* IEU0		Group		*/
-	bne,pn		%xcc, 13b				/* CTI				*/
-	 andn		%g3, %o2, %g4				/* IEU0		Group		*/
-
-	andcc		%g4, %g2, %g0				/* IEU1		Group		*/
-	be,pt		%xcc, 10b				/* CTI				*/
-	 srlx		%g4, 32, %g4				/* IEU0				*/
-	andcc		%g4, %g2, %g0				/* IEU1		Group		*/
-#else
-	bne,pn		%xcc, 13b				/* CTI				*/
-	 sub		%o2, %g1, %g3				/* IEU0		Group		*/
-	andcc		%g3, %g2, %g0				/* IEU1		Group		*/
-
-	be,pt		%xcc, 10b				/* CTI				*/
-	 srlx		%g3, 32, %g3				/* IEU0				*/
-	andcc		%g3, %g2, %g0				/* IEU1		Group		*/
-#endif
-	be,pt		%xcc, 12f				/* CTI				*/
-
-	 srlx		%o2, 56, %g3				/* IEU0				*/
-	andcc		%g3, 0xff, %g0				/* IEU1		Group		*/
-	be,pn		%icc, 4b				/* CTI				*/
-	 srlx		%o2, 48, %g3				/* IEU0				*/
-
-	andcc		%g3, 0xff, %g0				/* IEU1		Group		*/
-	be,pn		%icc, 4b				/* CTI				*/
-	 srlx		%o2, 40, %g3				/* IEU0				*/
-	andcc		%g3, 0xff, %g0				/* IEU1		Group		*/
-
-	be,pn		%icc, 4b				/* CTI				*/
-	 srlx		%o2, 32, %g3				/* IEU0				*/
-	andcc		%g3, 0xff, %g0				/* IEU1		Group		*/
-	be,pn		%icc, 4b				/* CTI				*/
-
-12:	 srlx		%o2, 24, %g3				/* IEU0				*/
-	andcc		%g3, 0xff, %g0				/* IEU1		Group		*/
-	be,pn		%icc, 4b				/* CTI				*/
-	 srlx		%o2, 16, %g3				/* IEU0				*/
-
-	andcc		%g3, 0xff, %g0				/* IEU1		Group		*/
-	be,pn		%icc, 4b				/* CTI				*/
-	 srlx		%o2, 8, %g3				/* IEU0				*/
-	andcc		%g3, 0xff, %g0				/* IEU1		Group		*/
-
-	be,pn		%icc, 4b				/* CTI				*/
-	 andcc		%o2, 0xff, %g0				/* IEU1		Group		*/
-	be,pn		%icc, 4b				/* CTI				*/
-	 sllx		%g6, %g5, %o3				/* IEU0				*/
-
-	ba,pt		%xcc, 11b				/* CTI		Group		*/
-	 ldxa		[%o1 + %o0] ASI_PNF, %g6		/* Load				*/
+	or	rSTR2, rSTR1, rTMP1
+	sethi	%hi(0x80808080), r8080
+
+	andcc	rTMP1, 0x7, %g0
+	bne,pn	%icc, .Lmaybe_barrel_shift
+	 or	r8080, %lo(0x80808080), r8080
+	ldx	[rSTR1], rWORD1
+
+	sub	rSTR2, rSTR1, rSTR2
+	sllx	r8080, 32, rTMP1
+
+	ldx	[rSTR1 + rSTR2], rWORD2
+	or	r8080, rTMP1, r8080
+
+	ba,pt	%xcc, .Laligned_loop_entry
+	 srlx	r8080, 7, r0101
+
+	.align	32
+.Laligned_loop_entry:
+.Laligned_loop:
+	add	rSTR1, 8, rSTR1
+
+	sub	rWORD1, r0101, rTMP2
+	xorcc	rWORD1, rWORD2, rSTRXOR
+	bne,pn	%xcc, .Lcommon_endstring
+
+	 andn	r8080, rWORD1, rTMP1
+
+	ldxa	[rSTR1] ASI_PNF, rWORD1
+	andcc	rTMP1, rTMP2, %g0
+	be,a,pt	%xcc, .Laligned_loop
+
+	 ldxa	[rSTR1 + rSTR2] ASI_PNF, rWORD2
+
+.Lcommon_equal:
+	retl
+	 mov	0, %o0
+
+	/* All loops terminate here once they find an unequal word.
+	 * If a zero byte appears in the word before the first unequal
+	 * byte, we must report zero.  Otherwise we report '1' or '-1'
+	 * depending upon whether the first mis-matching byte is larger
+	 * in the first string or the second, respectively.
+	 *
+	 * First we compute a 64-bit mask value that has "0x01" in
+	 * each byte where a zero exists in rWORD1.  rSTRXOR holds the
+	 * value (rWORD1 ^ rWORD2).  Therefore, if considered as an
+	 * unsigned quantity, our "0x01" mask value is "greater than"
+	 * rSTRXOR then a zero terminating byte comes first and
+	 * therefore we report '0'.
+	 *
+	 * The formula for this mask is:
+	 *
+	 *    mask_tmp1 = ~rWORD1 & 0x8080808080808080;
+	 *    mask_tmp2 = ((rWORD1 & 0x7f7f7f7f7f7f7f7f) +
+	 *                 0x7f7f7f7f7f7f7f7f);
+	 *
+	 *    mask = ((mask_tmp1 & ~mask_tmp2) >> 7);
+	 */
+.Lcommon_endstring:
+	andn	rWORD1, r8080, rTMP2
+	or	r8080, 1, %o1
+
+	mov	1, %o0
+	sub	rTMP2, %o1, rTMP2
+
+	cmp	rWORD1, rWORD2
+	andn	rTMP1, rTMP2, rTMP1
+
+	movleu	%xcc, -1, %o0
+	srlx	rTMP1, 7, rTMP1
+
+	cmp	rTMP1, rSTRXOR
+	retl
+	 movgu	%xcc, 0, %o0
+
+.Lmaybe_barrel_shift:
+	sub	rSTR2, rSTR1, rSTR2
+	sllx	r8080, 32, rTMP1
+
+	or	r8080, rTMP1, r8080
+	and	rSTR1, 0x7, rTMP2
+
+	srlx	r8080, 7, r0101
+	andn	rSTR1, 0x7, rSTR1
+
+	ldxa	[rSTR1] ASI_PNF, rWORD1
+	andcc	rSTR2, 0x7, rSLL
+	sll	rTMP2, 3, rSTRXOR
+
+	bne,pn	%icc, .Lneed_barrel_shift
+	 mov	-1, rTMP1
+	ldxa	[rSTR1 + rSTR2] ASI_PNF, rBARREL
+
+	srlx	rTMP1, rSTRXOR, rTMP2
+
+	orn	rWORD1, rTMP2, rWORD1
+	ba,pt	%xcc, .Laligned_loop_entry
+	 orn	rBARREL, rTMP2, rWORD2
+
+.Lneed_barrel_shift:
+	sllx	rSLL, 3, rSLL
+	andn	rSTR2, 0x7, rSTR2
+
+	ldxa	[rSTR1 + rSTR2] ASI_PNF, rBARREL
+	mov	64, rTMP2
+	sub	rTMP2, rSLL, rSRL
+
+	srlx	rTMP1, rSTRXOR, rTMP1
+	add	rSTR2, 8, rSTR2
+
+	orn	rWORD1, rTMP1, rWORD1
+	sllx	rBARREL, rSLL, rWORD2
+	ldxa	[rSTR1 + rSTR2] ASI_PNF, rBARREL
+
+	add	rSTR1, 8, rSTR1
+	sub	rWORD1, r0101, rTMP2
+
+	srlx	rBARREL, rSRL, rSTRXOR
+
+	or	rWORD2, rSTRXOR, rWORD2
+
+	orn	rWORD2, rTMP1, rWORD2
+	ba,pt	%xcc, .Lbarrel_shift_loop_entry
+	 andn	r8080, rWORD1, rTMP1
+
+.Lbarrel_shift_loop:
+	sllx	rBARREL, rSLL, rWORD2
+	ldxa	[rSTR1 + rSTR2] ASI_PNF, rBARREL
+
+	add	rSTR1, 8, rSTR1
+	sub	rWORD1, r0101, rTMP2
+
+	srlx	rBARREL, rSRL, rSTRXOR
+	andn	r8080, rWORD1, rTMP1
+
+	or	rWORD2, rSTRXOR, rWORD2
+
+.Lbarrel_shift_loop_entry:
+	xorcc	rWORD1, rWORD2, rSTRXOR
+	bne,pn	%xcc, .Lcommon_endstring
+
+	 andcc	rTMP1, rTMP2, %g0
+	be,a,pt	%xcc, .Lbarrel_shift_loop
+	 ldxa	[rSTR1] ASI_PNF, rWORD1
+
+	retl
+	 mov	0, %o0
 END(strcmp)
 libc_hidden_builtin_def (strcmp)

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                      |    4 +
 sysdeps/sparc/sparc64/strcmp.S |  416 ++++++++++++++++------------------------
 2 files changed, 173 insertions(+), 247 deletions(-)


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]