This is the mail archive of the libc-hacker@sourceware.cygnus.com mailing list for the glibc project.

Note that libc-hacker is a closed list. You may look at the archives of this list, but subscription and posting are not open.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

PATCH: symbolic regnames for sysdeps/powerpc/{str,mem}*.S


These changes are purely cosmetic, as verified by diff'ing the
preprocessed files before & after, and for paranoia's sake, diff'ing
the disassembled object files before & after.

Here's what I did:

* defined & used symbolic register names
* converted block comments about register usage
  to comments attached to the regname #defines.
* converted '#' comments to C comments.
* aligned operands column with tabs.

These changes pave the way for re-shuffling the register assignments
necessitated for bounded-pointers in a way that has minimal impact on
the assembler code.  Most BP changes will only involve the
register-name #defines.

Greg

Index: memset.S
===================================================================
RCS file: /cvs/glibc/libc/sysdeps/powerpc/memset.S,v
retrieving revision 1.4
diff -u -p -r1.4 memset.S
--- memset.S	2000/02/28 23:13:15	1.4
+++ memset.S	2000/06/06 21:57:54
@@ -19,181 +19,192 @@
 
 #include <sysdep.h>
 
-EALIGN(memset,5,1)
 /* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
    Returns 's'.
 
    The memset is done in three sizes: byte (8 bits), word (32 bits),
    cache line (256 bits). There is a special case for setting cache lines
-   to 0, to take advantage of the dcbz instruction.
-   r6:	current address we are storing at
-   r7:	number of bytes we are setting now (when aligning)  */
+   to 0, to take advantage of the dcbz instruction.  */
 
+EALIGN (memset, 5, 1)
+
+#define rTMP	r0
+#define rRTN	r3	/* initial value of 1st argument */
+#define rCHR	r4	/* char to set in each byte */
+#define rLEN	r5	/* length of region to set */
+#define rMEMP	r6	/* address at which we are storing */
+#define rALIGN	r7	/* number of bytes we are setting now (when aligning) */
+#define rMEMP2	r8
+
+#define rPOS32	r7	/* constant +32 for clearing with dcbz */
+#define rNEG64	r8	/* constant -64 for clearing with dcbz */
+#define rNEG32	r9	/* constant -32 for clearing with dcbz */
+
 /* take care of case for size <= 4  */
-	cmplwi cr1,r5,4
-	andi.  r7,r3,3
-	mr     r6,r3
-	ble-   cr1,L(small)
+	cmplwi	cr1, rLEN, 4
+	andi.	rALIGN, rRTN, 3
+	mr	rMEMP, rRTN
+	ble-	cr1, L(small)
 /* align to word boundary  */
-	cmplwi cr5,r5,31
-	rlwimi r4,r4,8,16,23
-	beq+   L(aligned)		# 8th instruction from .align
-	mtcrf  0x01,r3
-	subfic r7,r7,4
-	add    r6,r6,r7
-	sub    r5,r5,r7
-	bf+    31,L(g0)
-	stb    r4,0(r3)
-	bt     30,L(aligned)
-L(g0):	sth    r4,-2(r6)		#  16th instruction from .align
+	cmplwi	cr5, rLEN, 31
+	rlwimi	rCHR, rCHR, 8, 16, 23
+	beq+	L(aligned)	/* 8th instruction from .align */
+	mtcrf	0x01, rRTN
+	subfic	rALIGN, rALIGN, 4
+	add	rMEMP, rMEMP, rALIGN
+	sub	rLEN, rLEN, rALIGN
+	bf+	31, L(g0)
+	stb	rCHR, 0(rRTN)
+	bt	30, L(aligned)
+L(g0):	sth	rCHR, -2(rMEMP)	/* 16th instruction from .align */
 /* take care of case for size < 31 */
 L(aligned):
-	mtcrf  0x01,r5
-	rlwimi r4,r4,16,0,15
-	ble    cr5,L(medium)
+	mtcrf	0x01, rLEN
+	rlwimi	rCHR, rCHR, 16, 0, 15
+	ble	cr5, L(medium)
 /* align to cache line boundary...  */
-	andi.  r7,r6,0x1C
-	subfic r7,r7,0x20
-	beq    L(caligned)
-	mtcrf  0x01,r7
-	add    r6,r6,r7
-	sub    r5,r5,r7
-	cmplwi cr1,r7,0x10
-	mr     r8,r6
-	bf     28,L(a1)
-	stw    r4,-4(r8)
-	stwu   r4,-8(r8)
-L(a1):	blt    cr1,L(a2)
-	stw    r4,-4(r8)	# 32nd instruction from .align
-	stw    r4,-8(r8)
-	stw    r4,-12(r8)
-	stwu   r4,-16(r8)
-L(a2):	bf     29,L(caligned)
-	stw    r4,-4(r8)
+	andi.	rALIGN, rMEMP, 0x1C
+	subfic	rALIGN, rALIGN, 0x20
+	beq	L(caligned)
+	mtcrf	0x01, rALIGN
+	add	rMEMP, rMEMP, rALIGN
+	sub	rLEN, rLEN, rALIGN
+	cmplwi	cr1, rALIGN, 0x10
+	mr	rMEMP2, rMEMP
+	bf	28, L(a1)
+	stw	rCHR, -4(rMEMP2)
+	stwu	rCHR, -8(rMEMP2)
+L(a1):	blt	cr1, L(a2)
+	stw	rCHR, -4(rMEMP2) /* 32nd instruction from .align */
+	stw	rCHR, -8(rMEMP2)
+	stw	rCHR, -12(rMEMP2)
+	stwu	rCHR, -16(rMEMP2)
+L(a2):	bf	29, L(caligned)
+	stw	rCHR, -4(rMEMP2)
 /* now aligned to a cache line.  */
 L(caligned):
-	cmplwi cr1,r4,0
-	clrrwi. r7,r5,5
-	mtcrf  0x01,r5		# 40th instruction from .align
-	beq    cr1,L(zloopstart) # special case for clearing memory using dcbz
-	srwi   r0,r7,5
-	mtctr  r0
-	beq    L(medium)	# we may not actually get to do a full line
-	clrlwi. r5,r5,27
-	add    r6,r6,r7
-	li     r8,-0x40
-	bdz    L(cloopdone)	# 48th instruction from .align
-
-L(c3):	dcbz   r8,r6
-	stw    r4,-4(r6)
-	stw    r4,-8(r6)
-	stw    r4,-12(r6)
-	stw    r4,-16(r6)
-	nop			# let 601 fetch last 4 instructions of loop
-	stw    r4,-20(r6)
-	stw    r4,-24(r6)	# 56th instruction from .align
-	nop			# let 601 fetch first 8 instructions of loop
-	stw    r4,-28(r6)
-	stwu   r4,-32(r6)
-	bdnz   L(c3)
+	cmplwi	cr1, rCHR, 0
+	clrrwi.	rALIGN, rLEN, 5
+	mtcrf	0x01, rLEN	/* 40th instruction from .align */
+	beq	cr1, L(zloopstart) /* special case for clearing memory using dcbz */
+	srwi	rTMP, rALIGN, 5
+	mtctr	rTMP
+	beq	L(medium)	/* we may not actually get to do a full line */
+	clrlwi.	rLEN, rLEN, 27
+	add	rMEMP, rMEMP, rALIGN
+	li	rNEG64, -0x40
+	bdz	L(cloopdone)	/* 48th instruction from .align */
+
+L(c3):	dcbz	rNEG64, rMEMP
+	stw	rCHR, -4(rMEMP)
+	stw	rCHR, -8(rMEMP)
+	stw	rCHR, -12(rMEMP)
+	stw	rCHR, -16(rMEMP)
+	nop			/* let 601 fetch last 4 instructions of loop */
+	stw	rCHR, -20(rMEMP)
+	stw	rCHR, -24(rMEMP) /* 56th instruction from .align */
+	nop			/* let 601 fetch first 8 instructions of loop */
+	stw	rCHR, -28(rMEMP)
+	stwu	rCHR, -32(rMEMP)
+	bdnz	L(c3)
 L(cloopdone):
-	stw    r4,-4(r6)
-	stw    r4,-8(r6)
-	stw    r4,-12(r6)
-	stw    r4,-16(r6)	# 64th instruction from .align
-	stw    r4,-20(r6)
-	cmplwi cr1,r5,16
-	stw    r4,-24(r6)
-	stw    r4,-28(r6)
-	stwu   r4,-32(r6)
+	stw	rCHR, -4(rMEMP)
+	stw	rCHR, -8(rMEMP)
+	stw	rCHR, -12(rMEMP)
+	stw	rCHR, -16(rMEMP) /* 64th instruction from .align */
+	stw	rCHR, -20(rMEMP)
+	cmplwi	cr1, rLEN, 16
+	stw	rCHR, -24(rMEMP)
+	stw	rCHR, -28(rMEMP)
+	stwu	rCHR, -32(rMEMP)
 	beqlr
-	add    r6,r6,r7
-	b      L(medium_tail2)	# 72nd instruction from .align
+	add	rMEMP, rMEMP, rALIGN
+	b	L(medium_tail2)	/* 72nd instruction from .align */
 
 	.align 5
 	nop
 /* Clear lines of memory in 128-byte chunks.  */
 L(zloopstart):
-	clrlwi r5,r5,27
-	mtcrf  0x02,r7
-	srwi.  r0,r7,7
-	mtctr  r0
-	li     r7,0x20
-	li     r8,-0x40
-	cmplwi cr1,r5,16	# 8
-	bf     26,L(z0)
-	dcbz   0,r6
-	addi   r6,r6,0x20
-L(z0):	li     r9,-0x20
-	bf     25,L(z1)
-	dcbz   0,r6
-	dcbz   r7,r6
-	addi   r6,r6,0x40	# 16
-L(z1):	cmplwi cr5,r5,0
-	beq    L(medium)
+	clrlwi	rLEN, rLEN, 27
+	mtcrf	0x02, rALIGN
+	srwi.	rTMP, rALIGN, 7
+	mtctr	rTMP
+	li	rPOS32, 0x20
+	li	rNEG64, -0x40
+	cmplwi	cr1, rLEN, 16	/* 8 */
+	bf	26, L(z0)
+	dcbz	0, rMEMP
+	addi	rMEMP, rMEMP, 0x20
+L(z0):	li	rNEG32, -0x20
+	bf	25, L(z1)
+	dcbz	0, rMEMP
+	dcbz	rPOS32, rMEMP
+	addi	rMEMP, rMEMP, 0x40 /* 16 */
+L(z1):	cmplwi	cr5, rLEN, 0
+	beq	L(medium)
 L(zloop):
-	dcbz   0,r6
-	dcbz   r7,r6
-	addi   r6,r6,0x80
-	dcbz   r8,r6
-	dcbz   r9,r6
-	bdnz   L(zloop)
-	beqlr  cr5
-	b      L(medium_tail2)
+	dcbz	0, rMEMP
+	dcbz	rPOS32, rMEMP
+	addi	rMEMP, rMEMP, 0x80
+	dcbz	rNEG64, rMEMP
+	dcbz	rNEG32, rMEMP
+	bdnz	L(zloop)
+	beqlr	cr5
+	b	L(medium_tail2)
 
 	.align 5
 L(small):
 /* Memset of 4 bytes or less.  */
-	cmplwi cr5,r5,1
-	cmplwi cr1,r5,3
-	bltlr  cr5
-	stb    r4,0(r6)
-	beqlr  cr5
+	cmplwi	cr5, rLEN, 1
+	cmplwi	cr1, rLEN, 3
+	bltlr	cr5
+	stb	rCHR, 0(rMEMP)
+	beqlr	cr5
 	nop
-	stb    r4,1(r6)
-	bltlr  cr1
-	stb    r4,2(r6)
-	beqlr  cr1
+	stb	rCHR, 1(rMEMP)
+	bltlr	cr1
+	stb	rCHR, 2(rMEMP)
+	beqlr	cr1
 	nop
-	stb    r4,3(r6)
+	stb	rCHR, 3(rMEMP)
 	blr
 
 /* Memset of 0-31 bytes.  */
 	.align 5
 L(medium):
-	cmplwi cr1,r5,16
+	cmplwi	cr1, rLEN, 16
 L(medium_tail2):
-	add    r6,r6,r5
+	add	rMEMP, rMEMP, rLEN
 L(medium_tail):
-	bt-    31,L(medium_31t)
-	bt-    30,L(medium_30t)
+	bt-	31, L(medium_31t)
+	bt-	30, L(medium_30t)
 L(medium_30f):
-	bt-    29,L(medium_29t)
+	bt-	29, L(medium_29t)
 L(medium_29f):
-	bge-   cr1,L(medium_27t)
-	bflr-  28
-	stw    r4,-4(r6)		# 8th instruction from .align
-	stw    r4,-8(r6)
+	bge-	cr1, L(medium_27t)
+	bflr-	28
+	stw	rCHR, -4(rMEMP)	/* 8th instruction from .align */
+	stw	rCHR, -8(rMEMP)
 	blr
 
 L(medium_31t):
-	stbu   r4,-1(r6)
-	bf-    30,L(medium_30f)
+	stbu	rCHR, -1(rMEMP)
+	bf-	30, L(medium_30f)
 L(medium_30t):
-	sthu   r4,-2(r6)
-	bf-    29,L(medium_29f)
+	sthu	rCHR, -2(rMEMP)
+	bf-	29, L(medium_29f)
 L(medium_29t):
-	stwu   r4,-4(r6)
-	blt-   cr1,L(medium_27f)	# 16th instruction from .align
+	stwu	rCHR, -4(rMEMP)
+	blt-	cr1, L(medium_27f) /* 16th instruction from .align */
 L(medium_27t):
-	stw    r4,-4(r6)
-	stw    r4,-8(r6)
-	stw    r4,-12(r6)
-	stwu   r4,-16(r6)
+	stw	rCHR, -4(rMEMP)
+	stw	rCHR, -8(rMEMP)
+	stw	rCHR, -12(rMEMP)
+	stwu	rCHR, -16(rMEMP)
 L(medium_27f):
-	bflr-  28
+	bflr-	28
 L(medium_28t):
-	stw    r4,-4(r6)
-	stw    r4,-8(r6)
+	stw	rCHR, -4(rMEMP)
+	stw	rCHR, -8(rMEMP)
 	blr
 END(memset)
Index: strchr.S
===================================================================
RCS file: /cvs/glibc/libc/sysdeps/powerpc/strchr.S,v
retrieving revision 1.2
diff -u -p -r1.2 strchr.S
--- strchr.S	1999/10/11 22:23:53	1.2
+++ strchr.S	2000/06/06 21:57:54
@@ -1,5 +1,5 @@
 /* Optimized strchr implementation for PowerPC.
-   Copyright (C) 1997, 1999 Free Software Foundation, Inc.
+   Copyright (C) 1997, 1999, 2000 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -21,91 +21,95 @@
 
 /* See strlen.s for comments on how this works.  */
 
-/* char * [r3] strchr (const char *s [r3] , int c [r4] )
+/* char * [r3] strchr (const char *s [r3] , int c [r4] )  */
 
-   r0:	a temporary
-   r3:	our return result.
-   r4:	byte we're looking for, spread over the whole word
-   r5:	the current word
-   r6:	the constant 0xfefefeff (-0x01010101)
-   r7:	the constant 0x7f7f7f7f
-   r8:	pointer to the current word.
-   r9:	a temporary
-   r10:	the number of bits we should ignore in the first word
-   r11:	a mask with the bits to ignore set to 0
-   r12:	a temporary  */
-ENTRY(strchr)
-	rlwimi r4,r4,8,16,23
-	li   r11,-1
-	rlwimi r4,r4,16,0,15
-	lis  r6,0xfeff
-	lis  r7,0x7f7f
-	clrrwi r8,r3,2
-	addi r7,r7,0x7f7f
-	addi r6,r6,0xfffffeff
-	rlwinm r10,r3,3,27,28
+ENTRY (strchr)
+
+#define rTMP1	r0
+#define rRTN	r3	/* outgoing result */
+#define rSTRin	r3	/* incoming string arg */
+#define rCHR	r4	/* byte we're looking for, spread over the whole word */
+#define rCLZB	rCHR	/* leading zero byte count */
+#define rWORD	r5	/* the current word */
+#define rFEFE	r6	/* constant 0xfefefeff (-0x01010101) */
+#define r7F7F	r7	/* constant 0x7f7f7f7f */
+#define rSTR	r8	/* current word pointer */
+#define rTMP2	r9
+#define rIGN	r10	/* number of bits we should ignore in the first word */
+#define rMASK	r11	/* mask with the bits to ignore set to 0 */
+#define rTMP3	r12
+
+	rlwimi	rCHR, rCHR, 8, 16, 23
+	li	rMASK, -1
+	rlwimi	rCHR, rCHR, 16, 0, 15
+	lis	rFEFE, -0x101
+	lis	r7F7F, 0x7f7f
+	clrrwi	rSTR, rSTRin, 2
+	addi	r7F7F, r7F7F, 0x7f7f
+	addi	rFEFE, rFEFE, -0x101
+	rlwinm	rIGN, rSTRin, 3, 27, 28
 /* Test the first (partial?) word.  */
-	lwz  r5,0(r8)
-	srw  r11,r11,r10
-	orc  r5,r5,r11
-	add  r0,r6,r5
-	nor  r9,r7,r5
-	and. r0,r0,r9
-	xor  r12,r4,r5
-	orc  r12,r12,r11
-	b    L(loopentry)
+	lwz	rWORD, 0(rSTR)
+	srw	rMASK, rMASK, rIGN
+	orc	rWORD, rWORD, rMASK
+	add	rTMP1, rFEFE, rWORD
+	nor	rTMP2, r7F7F, rWORD
+	and.	rTMP1, rTMP1, rTMP2
+	xor	rTMP3, rCHR, rWORD
+	orc	rTMP3, rTMP3, rMASK
+	b	L(loopentry)
 
 /* The loop.  */
 
-L(loop):lwzu r5,4(r8)
-	and. r0,r0,r9
-/* Test for 0.  */
-	add  r0,r6,r5
-	nor  r9,r7,r5
-	bne  L(foundit)
-	and. r0,r0,r9
+L(loop):lwzu rWORD, 4(rSTR)
+	and.	rTMP1, rTMP1, rTMP2
+/* Test for 0.	*/
+	add	rTMP1, rFEFE, rWORD
+	nor	rTMP2, r7F7F, rWORD
+	bne	L(foundit)
+	and.	rTMP1, rTMP1, rTMP2
 /* Start test for the bytes we're looking for.  */
-	xor  r12,r4,r5
+	xor	rTMP3, rCHR, rWORD
 L(loopentry):
-	add  r0,r6,r12
-	nor  r9,r7,r12
-	beq  L(loop)
+	add	rTMP1, rFEFE, rTMP3
+	nor	rTMP2, r7F7F, rTMP3
+	beq	L(loop)
 /* There is a zero byte in the word, but may also be a matching byte (either
    before or after the zero byte).  In fact, we may be looking for a
    zero byte, in which case we return a match.  We guess that this hasn't
    happened, though.  */
 L(missed):
-	and. r0,r0,r9
-	li   r3,0
+	and.	rTMP1, rTMP1, rTMP2
+	li	rRTN, 0
 	beqlr
 /* It did happen. Decide which one was first...
    I'm not sure if this is actually faster than a sequence of
    rotates, compares, and branches (we use it anyway because it's shorter).  */
-	and  r6,r7,r5
-	or   r11,r7,r5
-	and  r0,r7,r12
-	or   r10,r7,r12
-	add  r6,r6,r7
-	add  r0,r0,r7
-	nor  r5,r11,r6
-	nor  r9,r10,r0
-	cmplw r5,r9
+	and	rFEFE, r7F7F, rWORD
+	or	rMASK, r7F7F, rWORD
+	and	rTMP1, r7F7F, rTMP3
+	or	rIGN, r7F7F, rTMP3
+	add	rFEFE, rFEFE, r7F7F
+	add	rTMP1, rTMP1, r7F7F
+	nor	rWORD, rMASK, rFEFE
+	nor	rTMP2, rIGN, rTMP1
+	cmplw	rWORD, rTMP2
 	bgtlr
-	cntlzw r4,r9
-	srwi r4,r4,3
-	add  r3,r8,r4
+	cntlzw	rCLZB, rTMP2
+	srwi	rCLZB, rCLZB, 3
+	add	rRTN, rSTR, rCLZB
 	blr
 
 L(foundit):
-	and  r0,r7,r12
-	or   r10,r7,r12
-	add  r0,r0,r7
-	nor  r9,r10,r0
-	cntlzw r4,r9
-	subi r8,r8,4
-	srwi r4,r4,3
-	add  r3,r8,r4
+	and	rTMP1, r7F7F, rTMP3
+	or	rIGN, r7F7F, rTMP3
+	add	rTMP1, rTMP1, r7F7F
+	nor	rTMP2, rIGN, rTMP1
+	cntlzw	rCLZB, rTMP2
+	subi	rSTR, rSTR, 4
+	srwi	rCLZB, rCLZB, 3
+	add	rRTN, rSTR, rCLZB
 	blr
-END(strchr)
+END (strchr)
 
-weak_alias(strchr,index)
+weak_alias(strchr, index)
Index: strcmp.S
===================================================================
RCS file: /cvs/glibc/libc/sysdeps/powerpc/strcmp.S,v
retrieving revision 1.4
diff -u -p -r1.4 strcmp.S
--- strcmp.S	2000/02/28 22:31:41	1.4
+++ strcmp.S	2000/06/06 21:57:54
@@ -21,95 +21,93 @@
 
 /* See strlen.s for comments on how the end-of-string testing works.  */
 
-EALIGN(strcmp,4,0)
-/* int [r3] strcmp (const char *p1 [r3], const char *p2 [r4])  */
+/* int [r3] strcmp (const char *s1 [r3], const char *s2 [r4])  */
 
-/* General register assignments:
-   r0:	temporary
-   r3:	pointer to previous word in s1
-   r4:	pointer to previous word in s2
-   r5:	current word from s1
-   r6:	current word from s2
-   r7:	0xfefefeff
-   r8:	0x7f7f7f7f
-   r9:	~(word in s1 | 0x7f7f7f7f)  */
-
-/* Register assignments in the prologue:
-   r10:	low 2 bits of p2-p1
-   r11:	mask to orc with r5/r6  */
-
-	or    r0,r4,r3
-	clrlwi. r0,r0,30
-	lis   r7,0xfeff
-	bne   L(unaligned)
-
-	lwz   r5,0(r3)
-	lwz   r6,0(r4)
-	lis   r8,0x7f7f
-	addi  r7,r7,-0x101
-	addi  r8,r8,0x7f7f
-	b     L(g1)
-
-L(g0):	lwzu  r5,4(r3)
-	bne   cr1,L(different)
-	lwzu  r6,4(r4)
-L(g1):	add   r0,r7,r5
-	nor   r9,r8,r5
-	and.  r0,r0,r9
-	cmpw  cr1,r5,r6
-	beq+  L(g0)
+EALIGN (strcmp, 4, 0)
+
+#define rTMP	r0
+#define rRTN	r3	/* return value */
+#define rSTR1	r3	/* first string arg */
+#define rSTR2	r4	/* second string arg */
+#define rWORD1	r5	/* current word in s1 */
+#define rWORD2	r6	/* current word in s2 */
+#define rFEFE	r7	/* constant 0xfefefeff (-0x01010101) */
+#define r7F7F	r8	/* constant 0x7f7f7f7f */
+#define rNEG	r9	/* ~(word in s1 | 0x7f7f7f7f) */
+#define rBITDIF	r10	/* bits that differ in s1 & s2 words */
+
+	or	rTMP, rSTR2, rSTR1
+	clrlwi.	rTMP, rTMP, 30
+	lis	rFEFE, -0x101
+	bne	L(unaligned)
+
+	lwz	rWORD1, 0(rSTR1)
+	lwz	rWORD2, 0(rSTR2)
+	lis	r7F7F, 0x7f7f
+	addi	rFEFE, rFEFE, -0x101
+	addi	r7F7F, r7F7F, 0x7f7f
+	b	L(g1)
+
+L(g0):	lwzu	rWORD1, 4(rSTR1)
+	bne	cr1, L(different)
+	lwzu	rWORD2, 4(rSTR2)
+L(g1):	add	rTMP, rFEFE, rWORD1
+	nor	rNEG, r7F7F, rWORD1
+	and.	rTMP, rTMP, rNEG
+	cmpw	cr1, rWORD1, rWORD2
+	beq+	L(g0)
 L(endstring):
 /* OK. We've hit the end of the string. We need to be careful that
    we don't compare two strings as different because of gunk beyond
    the end of the strings...  */
-	and   r0,r8,r5
-	beq   cr1,L(equal)
-	add   r0,r0,r8
-	xor.  r10,r5,r6
-	andc  r9,r9,r0
-	blt-  L(highbit)
-	cntlzw r10,r10
-	cntlzw r9,r9
-	addi  r9,r9,7
-	cmpw  cr1,r9,r10
-	sub   r3,r5,r6
-	bgelr+ cr1
+	and	rTMP, r7F7F, rWORD1
+	beq	cr1, L(equal)
+	add	rTMP, rTMP, r7F7F
+	xor.	rBITDIF, rWORD1, rWORD2
+	andc	rNEG, rNEG, rTMP
+	blt-	L(highbit)
+	cntlzw	rBITDIF, rBITDIF
+	cntlzw	rNEG, rNEG
+	addi	rNEG, rNEG, 7
+	cmpw	cr1, rNEG, rBITDIF
+	sub	rRTN, rWORD1, rWORD2
+	bgelr+	cr1
 L(equal):
-	li    r3,0
+	li	rRTN, 0
 	blr
 
 L(different):
-	lwz   r5,-4(r3)
-	xor.  r10,r5,r6
-	sub   r3,r5,r6
+	lwz	rWORD1, -4(rSTR1)
+	xor.	rBITDIF, rWORD1, rWORD2
+	sub	rRTN, rWORD1, rWORD2
 	bgelr+
 L(highbit):
-	ori   r3,r6,1
+	ori	rRTN, rWORD2, 1
 	blr
 
 
 /* Oh well.  In this case, we just do a byte-by-byte comparison.  */
 	.align 4
 L(unaligned):
-	lbz   r5,0(r3)
-	lbz   r6,0(r4)
-	b     L(u1)
-
-L(u0):	lbzu  r5,1(r3)
-	bne-  L(u4)
-	lbzu  r6,1(r4)
-L(u1):	cmpwi cr1,r5,0
-	beq-  cr1,L(u3)
-	cmpw  r5,r6
-	bne-  L(u3)
-	lbzu  r5,1(r3)
-	lbzu  r6,1(r4)
-	cmpwi cr1,r5,0
-	cmpw  r5,r6
-	bne+  cr1,L(u0)
-L(u3):	sub   r3,r5,r6
+	lbz	rWORD1, 0(rSTR1)
+	lbz	rWORD2, 0(rSTR2)
+	b	L(u1)
+
+L(u0):	lbzu	rWORD1, 1(rSTR1)
+	bne-	L(u4)
+	lbzu	rWORD2, 1(rSTR2)
+L(u1):	cmpwi	cr1, rWORD1, 0
+	beq-	cr1, L(u3)
+	cmpw	rWORD1, rWORD2
+	bne-	L(u3)
+	lbzu	rWORD1, 1(rSTR1)
+	lbzu	rWORD2, 1(rSTR2)
+	cmpwi	cr1, rWORD1, 0
+	cmpw	rWORD1, rWORD2
+	bne+	cr1, L(u0)
+L(u3):	sub	rRTN, rWORD1, rWORD2
 	blr
-L(u4):	lbz   r5,-1(r3)
-	sub   r3,r5,r6
+L(u4):	lbz	rWORD1, -1(rSTR1)
+	sub	rRTN, rWORD1, rWORD2
 	blr
 END(strcmp)
Index: strcpy.S
===================================================================
RCS file: /cvs/glibc/libc/sysdeps/powerpc/strcpy.S,v
retrieving revision 1.3
diff -u -p -r1.3 strcpy.S
--- strcpy.S	2000/02/28 22:33:07	1.3
+++ strcpy.S	2000/06/06 21:57:54
@@ -21,80 +21,80 @@
 
 /* See strlen.s for comments on how the end-of-string testing works.  */
 
-EALIGN(strcpy,4,0)
 /* char * [r3] strcpy (char *dest [r3], const char *src [r4])  */
 
-/* General register assignments:
-   r0:	temporary
-   r3:	saved `dest'
-   r4:	pointer to previous word in src
-   r5:	pointer to previous word in dest
-   r6:	current word from src
-   r7:	0xfefefeff
-   r8:	0x7f7f7f7f
-   r9:	~(word in src | 0x7f7f7f7f)
-   r10:	alternate word from src.  */
-
-	or    r0,r4,r3
-	clrlwi. r0,r0,30
-	addi  r5,r3,-4
-	bne   L(unaligned)
-
-	lis   r7,0xfeff
-	lis   r8,0x7f7f
-	lwz   r6,0(r4)
-	addi  r7,r7,-0x101
-	addi  r8,r8,0x7f7f
-	b     L(g2)
-
-L(g0):	lwzu  r10,4(r4)
-	stwu  r6,4(r5)
-	add   r0,r7,r10
-	nor   r9,r8,r10
-	and.  r0,r0,r9
-	bne-  L(g1)
-	lwzu  r6,4(r4)
-	stwu  r10,4(r5)
-L(g2):	add   r0,r7,r6
-	nor   r9,r8,r6
-	and.  r0,r0,r9
-	beq+  L(g0)
+EALIGN(strcpy, 4, 0)
 
-	mr    r10,r6
+#define rTMP	r0
+#define rRTN	r3	/* incoming DEST arg preserved as result */
+#define rSRC	r4	/* pointer to previous word in src */
+#define rDEST	r5	/* pointer to previous word in dest */
+#define rWORD	r6	/* current word from src */
+#define rFEFE	r7	/* constant 0xfefefeff (-0x01010101) */
+#define r7F7F	r8	/* constant 0x7f7f7f7f */
+#define rNEG	r9	/* ~(word in s1 | 0x7f7f7f7f) */
+#define rALT	r10	/* alternate word from src */
+
+	or	rTMP, rSRC, rRTN
+	clrlwi.	rTMP, rTMP, 30
+	addi	rDEST, rRTN, -4
+	bne	L(unaligned)
+
+	lis	rFEFE, -0x101
+	lis	r7F7F, 0x7f7f
+	lwz	rWORD, 0(rSRC)
+	addi	rFEFE, rFEFE, -0x101
+	addi	r7F7F, r7F7F, 0x7f7f
+	b	L(g2)
+
+L(g0):	lwzu	rALT, 4(rSRC)
+	stwu	rWORD, 4(rDEST)
+	add	rTMP, rFEFE, rALT
+	nor	rNEG, r7F7F, rALT
+	and.	rTMP, rTMP, rNEG
+	bne-	L(g1)
+	lwzu	rWORD, 4(rSRC)
+	stwu	rALT, 4(rDEST)
+L(g2):	add	rTMP, rFEFE, rWORD
+	nor	rNEG, r7F7F, rWORD
+	and.	rTMP, rTMP, rNEG
+	beq+	L(g0)
+
+	mr	rALT, rWORD
 /* We've hit the end of the string.  Do the rest byte-by-byte.  */
-L(g1):	rlwinm. r0,r10,8,24,31
-	stb   r0,4(r5)
+L(g1):	rlwinm.	rTMP, rALT, 8, 24, 31
+	stb	rTMP, 4(rDEST)
 	beqlr-
-	rlwinm. r0,r10,16,24,31
-	stb   r0,5(r5)
+	rlwinm.	rTMP, rALT, 16, 24, 31
+	stb	rTMP, 5(rDEST)
 	beqlr-
-	rlwinm. r0,r10,24,24,31
-	stb   r0,6(r5)
+	rlwinm.	rTMP, rALT, 24, 24, 31
+	stb	rTMP, 6(rDEST)
 	beqlr-
-	stb   r10,7(r5)
+	stb	rALT, 7(rDEST)
 	blr
 
 /* Oh well.  In this case, we just do a byte-by-byte copy.  */
 	.align 4
 	nop
 L(unaligned):
-	lbz   r6,0(r4)
-	addi  r5,r3,-1
-	cmpwi r6,0
-	beq-  L(u2)
-
-L(u0):	lbzu  r10,1(r4)
-	stbu  r6,1(r5)
-	cmpwi r10,0
-	beq-  L(u1)
+	lbz	rWORD, 0(rSRC)
+	addi	rDEST, rRTN, -1
+	cmpwi	rWORD, 0
+	beq-	L(u2)
+
+L(u0):	lbzu	rALT, 1(rSRC)
+	stbu	rWORD, 1(rDEST)
+	cmpwi	rALT, 0
+	beq-	L(u1)
 	nop		/* Let 601 load start of loop.  */
-	lbzu  r6,1(r4)
-	stbu  r10,1(r5)
-	cmpwi r6,0
-	bne+  L(u0)
-L(u2):	stb   r6,1(r5)
+	lbzu	rWORD, 1(rSRC)
+	stbu	rALT, 1(rDEST)
+	cmpwi	rWORD, 0
+	bne+	L(u0)
+L(u2):	stb	rWORD, 1(rDEST)
 	blr
-L(u1):	stb   r10,1(r5)
+L(u1):	stb	rALT, 1(rDEST)
 	blr
 
 END(strcpy)
Index: strlen.S
===================================================================
RCS file: /cvs/glibc/libc/sysdeps/powerpc/strlen.S,v
retrieving revision 1.4
diff -u -p -r1.4 strlen.S
--- strlen.S	1999/10/11 22:25:40	1.4
+++ strlen.S	2000/06/06 21:57:54
@@ -1,5 +1,5 @@
 /* Optimized strlen implementation for PowerPC.
-   Copyright (C) 1997, 1999 Free Software Foundation, Inc.
+   Copyright (C) 1997, 1999, 2000 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -69,76 +69,86 @@
    We can use condition registers cr0, cr1, cr5, cr6, and cr7 without saving
    them, the others we must save.  */
 
-ENTRY(strlen)
-/* On entry, r3 points to the string, and it's left that way.
-   We use r6 to store 0xfefefeff, and r7 to store 0x7f7f7f7f.
-   r4 is used to keep the current index into the string; r5 holds
-   the number of padding bits we prepend to the string to make it
-   start at a word boundary. r8 holds the 'current' word.
-   r9-12 are temporaries. r0 is used as a temporary and for discarded
-   results.  */
-	clrrwi r4,r3,2
-	lis   r7,0x7f7f
-	rlwinm r5,r3,3,27,28
-	lwz   r8,0(r4)
-	li    r9,-1
-	addi  r7,r7,0x7f7f
+/* int [r3] strlen (char *s [r3])  */
+
+ENTRY (strlen)
+
+#define rTMP1	r0
+#define rRTN	r3	/* incoming STR arg, outgoing result */
+#define rSTR	r4	/* current string position */
+#define rPADN	r5	/* number of padding bits we prepend to the
+			   string to make it start at a word boundary */
+#define rFEFE	r6	/* constant 0xfefefeff (-0x01010101) */
+#define r7F7F	r7	/* constant 0x7f7f7f7f */
+#define rWORD1	r8	/* current string word */
+#define rWORD2	r9	/* next string word */
+#define rMASK	r9	/* mask for first string word */
+#define rTMP2	r10
+#define rTMP3	r11
+#define rTMP4	r12
+
+	clrrwi	rSTR, rRTN, 2
+	lis	r7F7F, 0x7f7f
+	rlwinm	rPADN, rRTN, 3, 27, 28
+	lwz	rWORD1, 0(rSTR)
+	li	rMASK, -1
+	addi	r7F7F, r7F7F, 0x7f7f
 /* That's the setup done, now do the first pair of words.
    We make an exception and use method (2) on the first two words, to reduce
    overhead.  */
-	srw   r9,r9,r5
-	and   r0,r7,r8
-	or    r10,r7,r8
-	add   r0,r0,r7
-	nor   r0,r10,r0
-	and.  r8,r0,r9
-	mtcrf 0x01,r3
-	bne   L(done0)
-	lis   r6,0xfeff
-	addi  r6,r6,-0x101
+	srw	rMASK, rMASK, rPADN
+	and	rTMP1, r7F7F, rWORD1
+	or	rTMP2, r7F7F, rWORD1
+	add	rTMP1, rTMP1, r7F7F
+	nor	rTMP1, rTMP2, rTMP1
+	and.	rWORD1, rTMP1, rMASK
+	mtcrf	0x01, rRTN
+	bne	L(done0)
+	lis	rFEFE, -0x101
+	addi	rFEFE, rFEFE, -0x101
 /* Are we now aligned to a doubleword boundary?  */
-	bt    29,L(loop)
+	bt	29, L(loop)
 
 /* Handle second word of pair.  */
-	lwzu  r8,4(r4)
-	and   r0,r7,r8
-	or    r10,r7,r8
-	add   r0,r0,r7
-	nor.  r8,r10,r0
-	bne   L(done0)
+	lwzu	rWORD1, 4(rSTR)
+	and	rTMP1, r7F7F, rWORD1
+	or	rTMP2, r7F7F, rWORD1
+	add	rTMP1, rTMP1, r7F7F
+	nor.	rWORD1, rTMP2, rTMP1
+	bne	L(done0)
 
 /* The loop.  */
 
 L(loop):
-	lwz   r8,4(r4)
-	lwzu  r9,8(r4)
-	add   r0,r6,r8
-	nor   r10,r7,r8
-	and.  r0,r0,r10
-	add   r11,r6,r9
-	nor   r12,r7,r9
-	bne   L(done1)
-	and.  r0,r11,r12
-	beq   L(loop)
-
-	and   r0,r7,r9
-	add   r0,r0,r7
-	andc  r8,r12,r0
-	b     L(done0)
+	lwz	rWORD1, 4(rSTR)
+	lwzu	rWORD2, 8(rSTR)
+	add	rTMP1, rFEFE, rWORD1
+	nor	rTMP2, r7F7F, rWORD1
+	and.	rTMP1, rTMP1, rTMP2
+	add	rTMP3, rFEFE, rWORD2
+	nor	rTMP4, r7F7F, rWORD2
+	bne	L(done1)
+	and.	rTMP1, rTMP3, rTMP4
+	beq	L(loop)
+
+	and	rTMP1, r7F7F, rWORD2
+	add	rTMP1, rTMP1, r7F7F
+	andc	rWORD1, rTMP4, rTMP1
+	b	L(done0)
 
 L(done1):
-	and   r0,r7,r8
-	subi  r4,r4,4
-	add   r0,r0,r7
-	andc  r8,r10,r0
+	and	rTMP1, r7F7F, rWORD1
+	subi	rSTR, rSTR, 4
+	add	rTMP1, rTMP1, r7F7F
+	andc	rWORD1, rTMP2, rTMP1
 
-/* When we get to here, r4 points to the first word in the string that
-   contains a zero byte, and the most significant set bit in r8 is in that
+/* When we get to here, rSTR points to the first word in the string that
+   contains a zero byte, and the most significant set bit in rWORD1 is in that
    byte.  */
 L(done0):
-	cntlzw r11,r8
-	subf  r0,r3,r4
-	srwi  r11,r11,3
-	add   r3,r0,r11
+	cntlzw	rTMP3, rWORD1
+	subf	rTMP1, rRTN, rSTR
+	srwi	rTMP3, rTMP3, 3
+	add	rRTN, rTMP1, rTMP3
 	blr
-END(strlen)
+END (strlen)

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]