This is the mail archive of the libc-ports@sources.redhat.com mailing list for the libc-ports project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[ARM] Optimised strchr and strlen


This is a strchr and strlen optimised for ARM v6t2 or v7. It's against svn rev r15869
with my previous memchr patch.   Tested both little & big endian.

(I've checked it still applies on svn trunk, but not done a retest on that; nothing
seems to have changed around there).

Dave

2012-12-19 Dr. David Alan Gilbert <david.gilbert@linaro.org>
	* sysdeps/arm/eabi/armv6t2/strchr.S: New file
	* sysdeps/arm/eabi/armv6t2/strlen.S: New file


diff -urN ports/sysdeps/arm/eabi/armv6t2/strchr.S src/ports/sysdeps/arm/eabi/armv6t2/strchr.S
--- ports/sysdeps/arm/eabi/armv6t2/strchr.S	1970-01-01 01:00:00.000000000 +0100
+++ ports/sysdeps/arm/eabi/armv6t2/strchr.S	2011-12-16 13:43:56.704694919 +0000
@@ -0,0 +1,71 @@
+/* Copyright (C) 2011 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Code contributed by Dave Gilbert <david.gilbert@linaro.org>
+ 
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+
+@ A very simple strchr routine, from benchmarks on A9 it's a bit faster than
+@ the current version in eglibc.
+@ While I have a version that does 8 bytes/loop and is a lot faster on long
+@ strings, it is slower on short strings, and short strings seem more common
+@ in strchr usage.
+@ Note: The use of cbz/cbnz means it's Thumb only
+
+@ 2011-02-07 david.gilbert@linaro.org
+@    Extracted from local git a5b438d861
+@ 2011-12-16 david.gilbert@linaro.org
+@    Copy from Cortex strings rev 65 and change license
+
+	.syntax unified
+
+	.text
+	.thumb
+
+@ ---------------------------------------------------------------------------
+
+	.thumb_func
+	.global strchr
+	.type strchr,%function
+ENTRY(strchr)
+	@ r0 = start of string
+	@ r1 = character to match
+	@ returns NULL for no match, or a pointer to the match
+	and	r1,r1, #255
+
+1:
+	ldrb	r2,[r0],#1
+	cmp	r2,r1
+	cbz	r2,10f
+	bne	1b
+
+	@ We're here if it matched
+5:
+	subs	r0,r0,#1
+	DO_RET(lr)
+
+10:
+	@ We're here if we ran off the end
+	cmp	r1, #0	@ Corner case - you can search for the nil and get a pointer to it
+	beq	5b	@ messy, if common we should branch at the start to a special loop
+	mov	r0,#0
+	DO_RET(lr)
+
+END(strchr)
+
+weak_alias (strchr, index)
+libc_hidden_builtin_def(strchr)
diff -urN ports/sysdeps/arm/eabi/armv6t2/strlen.S src/ports/sysdeps/arm/eabi/armv6t2/strlen.S
--- ports/sysdeps/arm/eabi/armv6t2/strlen.S	1970-01-01 01:00:00.000000000 +0100
+++ ports/sysdeps/arm/eabi/armv6t2/strlen.S	2011-12-16 13:43:01.991130183 +0000
@@ -0,0 +1,118 @@
+/* Copyright (C) 2011 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Code contributed by Dave Gilbert <david.gilbert@linaro.org>
+ 
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+
+@ This strlen routine is optimised on a Cortex-A9 and should work on
+@ all ARMv7 processors.   This routine is reasonably fast for short
+@ strings, but is probably slower than a simple implementation if all
+@ your strings are very short
+@ Note: The use of cbz/cbnz means it's Thumb only
+
+@ 2011-02-08 david.gilbert@linaro.org
+@    Extracted from local git 6848613a
+@ 2011-12-16 david.gilbert@linaro.org
+@    Copy from Cortex strings rev 65 and change license
+@    Add cfi magic, switch to ldrd
+
+
+@ this lets us check a flag in a 00/ff byte easily in either endianness
+#ifdef __ARMEB__
+#define CHARTSTMASK(c) 1<<(31-(c*8))
+#else
+#define CHARTSTMASK(c) 1<<(c*8)
+#endif
+
+@-----------------------------------------------------------------------------
+	.syntax unified
+
+	.text
+	.thumb
+
+	.thumb_func
+	.global strlen
+	.type strlen,%function
+ENTRY(strlen)
+	@ r0 = string
+	@ returns count of bytes in string not including terminator
+	mov	r1, r0
+	push	{ r4,r6 }
+	cfi_adjust_cfa_offset (8)
+	cfi_rel_offset (r4, 0)
+	cfi_rel_offset (r6, 4)
+
+	cfi_remember_state
+
+	mvns	r6, #0		@ all F
+	movs	r4, #0
+	tst	r0, #7
+	beq	2f
+
+1:
+	ldrb	r2, [r1], #1
+	tst	r1, #7		@ Hit alignment yet?
+	cbz	r2, 10f		@ Exit if we found the 0
+	bne	1b
+
+	@ So we're now aligned
+2:
+	ldrd	r2,r3,[r1],#8
+	uadd8	r2, r2, r6	@ Parallel add 0xff - sets the GE bits for anything that wasn't 0
+	sel	r2, r4, r6	@ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
+	uadd8	r3, r3, r6	@ Parallel add 0xff - sets the GE bits for anything that wasn't 0
+	sel	r3, r2, r6	@ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
+	cmp	r3, #0
+	beq	2b
+
+strlenendtmp:
+	@ One (or more) of the bytes we loaded was 0 - but which one?
+	@ r2 has the mask corresponding to the first loaded word
+	@ r3 has a combined mask of the two words - but if r2 was all-non 0 
+	@ then it's just the 2nd words
+	cmp	r2, #0
+	itte	eq
+	moveq	r2, r3		@ the end is in the 2nd word
+	subeq	r1,r1,#3
+	subne	r1,r1,#7
+
+	@ r1 currently points to the 2nd byte of the word containing the 0
+	tst	r2, # CHARTSTMASK(0)	@ 1st character
+	bne	10f
+	adds	r1,r1,#1
+	tst	r2, # CHARTSTMASK(1)	@ 2nd character
+	ittt	eq
+	addeq	r1,r1,#1
+	tsteq	r2, # (3<<15)		@ 2nd & 3rd character
+	@ If not the 3rd must be the last one
+	addeq	r1,r1,#1
+
+10:
+	@ r0 is still at the beginning, r1 is pointing 1 byte after terminator
+	sub	r0, r1, r0
+	subs	r0, r0, #1
+	pop	{ r4, r6 }
+
+	cfi_adjust_cfa_offset (-8)
+	cfi_restore (r4)
+	cfi_restore (r6)
+
+	DO_RET(lr)
+
+END(strlen)
+libc_hidden_builtin_def (strlen)


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]