This is the mail archive of the
libc-alpha@sources.redhat.com
mailing list for the glibc project.
64-bit mips versions of memcpy and memset
- From: Alexandre Oliva <aoliva at redhat dot com>
- To: libc-alpha at sources dot redhat dot com
- Date: 20 Mar 2003 18:12:50 -0300
- Subject: 64-bit mips versions of memcpy and memset
- Organization: GCC Team, Red Hat
These patches implement memcpy and memset so as to take advantage of
64-bit registers available on n32 and n64. Ok?
Index: ChangeLog
from Alexandre Oliva <aoliva at redhat dot com>
* sysdeps/mips/mips64/memcpy.S, sysdeps/mips/mips64/memset.S: New.
* sysdeps/mips/memcpy.S, sysdeps/mips/memset.S: Update comments.
Index: sysdeps/mips/memcpy.S
===================================================================
RCS file: /cvs/glibc/libc/sysdeps/mips/memcpy.S,v
retrieving revision 1.3
diff -u -p -r1.3 memcpy.S
--- sysdeps/mips/memcpy.S 14 Mar 2003 05:31:27 -0000 1.3
+++ sysdeps/mips/memcpy.S 20 Mar 2003 00:32:38 -0000
@@ -21,12 +21,7 @@
#include <endian.h>
-/* void *memcpy(void *s1, const void *s2, size_t n);
-
- This routine could be optimized further for MIPS64, but this is left
- as an exercise for the future. When it is done, the file should be kept
- as a sisterfile to this one, and placed in the sysdeps/mips/mips64
- directory. */
+/* void *memcpy(void *s1, const void *s2, size_t n); */
#if __BYTE_ORDER == __BIG_ENDIAN
# define LWHI lwl /* high part is left in big-endian */
@@ -38,19 +33,6 @@
# define SWHI swr /* high part is right in little-endian */
# define LWLO lwl /* low part is left in little-endian */
# define SWLO swl /* low part is left in little-endian */
-#endif
-
-#ifndef t0
-# define t0 a4
-#endif
-#ifndef t1
-# define t1 a5
-#endif
-#ifndef t2
-# define t2 a6
-#endif
-#ifndef t3
-# define t3 a7
#endif
ENTRY (memcpy)
Index: sysdeps/mips/memset.S
===================================================================
RCS file: /cvs/glibc/libc/sysdeps/mips/memset.S,v
retrieving revision 1.3
diff -u -p -r1.3 memset.S
--- sysdeps/mips/memset.S 14 Mar 2003 05:31:27 -0000 1.3
+++ sysdeps/mips/memset.S 20 Mar 2003 00:32:38 -0000
@@ -21,24 +21,12 @@
#include <endian.h>
-/* void *memset(void *s, int c, size_t n).
-
- This routine could be optimized further for MIPS64, but this is left
- as an exercise for the future. When it is done, the file should be kept
- as a sisterfile to this one, and placed in the sysdeps/mips/mips64
- directory. */
+/* void *memset(void *s, int c, size_t n). */
#if __BYTE_ORDER == __BIG_ENDIAN
# define SWHI swl /* high part is left in big-endian */
#else
# define SWHI swr /* high part is right in little-endian */
-#endif
-
-#ifndef t0
-# define t0 a4
-#endif
-#ifndef t1
-# define t1 a5
#endif
ENTRY (memset)
Index: sysdeps/mips/mips64/memcpy.S
===================================================================
RCS file: sysdeps/mips/mips64/memcpy.S
diff -N sysdeps/mips/mips64/memcpy.S
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ sysdeps/mips/mips64/memcpy.S 20 Mar 2003 00:32:38 -0000
@@ -0,0 +1,139 @@
+/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Hartvig Ekner <hartvige at mips dot com>, 2002.
+ Ported to mips3 n32/n64 by Alexandre Oliva <aoliva at redhat dot com>
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <endian.h>
+#include <sys/asm.h>
+
+
+/* void *memcpy(void *s1, const void *s2, size_t n);
+
+ This could probably be optimized further. */
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+# define LDHI ldl /* high part is left in big-endian */
+# define SDHI sdl /* high part is left in big-endian */
+# define LDLO ldr /* low part is right in big-endian */
+# define SDLO sdr /* low part is right in big-endian */
+#else
+# define LDHI ldr /* high part is right in little-endian */
+# define SDHI sdr /* high part is right in little-endian */
+# define LDLO ldl /* low part is left in little-endian */
+# define SDLO sdl /* low part is left in little-endian */
+#endif
+
+ENTRY (memcpy)
+ .set noreorder
+
+ slti a4, a2, 8 # Less than 8?
+ bne a4, zero, L(last8)
+ move v0, a0 # Setup exit value before too late
+
+ xor a4, a1, a0 # Find a0/a1 displacement
+ andi a4, 0x7
+ bne a4, zero, L(shift) # Go handle the unaligned case
+ PTR_SUBU a5, zero, a1
+ andi a5, 0x7 # a0/a1 are aligned, but are we
+ beq a5, zero, L(chk8w) # starting in the middle of a word?
+ PTR_SUBU a2, a5
+ LDHI a4, 0(a1) # Yes we are... take care of that
+ PTR_ADDU a1, a5
+ SDHI a4, 0(a0)
+ PTR_ADDU a0, a5
+
+L(chk8w):
+ andi a4, a2, 0x3f # 64 or more bytes left?
+ beq a4, a2, L(chk1w)
+ PTR_SUBU a3, a2, a4 # Yes
+ PTR_ADDU a3, a1 # a3 = end address of loop
+ move a2, a4 # a2 = what will be left after loop
+L(lop8w):
+ ld a4, 0(a1) # Loop taking 8 words at a time
+ ld a5, 8(a1)
+ ld a6, 16(a1)
+ ld a7, 24(a1)
+ ld t4, 32(a1)
+ ld t5, 40(a1)
+ ld t6, 48(a1)
+ ld t7, 56(a1)
+ PTR_ADDIU a0, 64
+ PTR_ADDIU a1, 64
+ sd a4, -64(a0)
+ sd a5, -56(a0)
+ sd a6, -48(a0)
+ sd a7, -40(a0)
+ sd t4, -32(a0)
+ sd t5, -24(a0)
+ sd t6, -16(a0)
+ bne a1, a3, L(lop8w)
+ sd t7, -8(a0)
+
+L(chk1w):
+ andi a4, a2, 0x7 # 4 or more bytes left?
+ beq a4, a2, L(last8)
+ PTR_SUBU a3, a2, a4 # Yes, handle them one word at a time
+ PTR_ADDU a3, a1 # a3 again end address
+ move a2, a4
+L(lop1w):
+ ld a4, 0(a1)
+ PTR_ADDIU a0, 8
+ PTR_ADDIU a1, 8
+ bne a1, a3, L(lop1w)
+ sd a4, -8(a0)
+
+L(last8):
+ blez a2, L(lst8e) # Handle last 8 bytes, one at a time
+ PTR_ADDU a3, a2, a1
+L(lst8l):
+ lb a4, 0(a1)
+ PTR_ADDIU a0, 1
+ PTR_ADDIU a1, 1
+ bne a1, a3, L(lst8l)
+ sb a4, -1(a0)
+L(lst8e):
+ jr ra # Bye, bye
+ nop
+
+L(shift):
+ PTR_SUBU a3, zero, a0 # Src and Dest unaligned
+ andi a3, 0x7 # (unoptimized case...)
+ beq a3, zero, L(shfa5)
+ PTR_SUBU a2, a3 # a2 = bytes left
+ LDHI a4, 0(a1) # Take care of first odd part
+ LDLO a4, 7(a1)
+ PTR_ADDU a1, a3
+ SDHI a4, 0(a0)
+ PTR_ADDU a0, a3
+L(shfa5):
+ andi a4, a2, 0x7
+ PTR_SUBU a3, a2, a4
+ PTR_ADDU a3, a1
+L(shfth):
+ LDHI a5, 0(a1) # Limp through, word by word
+ LDLO a5, 7(a1)
+ PTR_ADDIU a0, 8
+ PTR_ADDIU a1, 8
+ bne a1, a3, L(shfth)
+ sd a5, -8(a0)
+ b L(last8) # Handle anything which may be left
+ move a2, a4
+
+ .set reorder
+END (memcpy)
Index: sysdeps/mips/mips64/memset.S
===================================================================
RCS file: sysdeps/mips/mips64/memset.S
diff -N sysdeps/mips/mips64/memset.S
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ sysdeps/mips/mips64/memset.S 20 Mar 2003 00:32:38 -0000
@@ -0,0 +1,91 @@
+/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Hartvig Ekner <hartvige at mips dot com>, 2002.
+ Ported to mips3 n32/n64 by Alexandre Oliva <aoliva at redhat dot com>
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <endian.h>
+#include <sys/asm.h>
+
+
+/* void *memset(void *s, int c, size_t n);
+
+ This could probably be optimized further. */
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+# define SDHI sdl /* high part is left in big-endian */
+#else
+# define SDHI sdr /* high part is right in little-endian */
+#endif
+
+ENTRY (memset)
+ .set noreorder
+
+ slti t5, a2, 8 # Less than 8?
+ bne t5, zero, L(last8)
+ move v0, a0 # Setup exit value before too late
+
+ beq a1, zero, L(ueven) # If zero pattern, no need to extend
+ andi a1, 0xff # Avoid problems with bogus arguments
+ dsll t4, a1, 8
+ or a1, t4
+ dsll t4, a1, 16
+ or a1, t4 # a1 is now pattern in full word
+ dsll t4, a1, 32
+ or a1, t4 # a1 is now pattern in double word
+
+L(ueven):
+ PTR_SUBU t4, zero, a0 # Unaligned address?
+ andi t4, 0x7
+ beq t4, zero, L(chkw)
+ PTR_SUBU a2, t4
+ SDHI a1, 0(a0) # Yes, handle first unaligned part
+ PTR_ADDU a0, t4 # Now both a0 and a2 are updated
+
+L(chkw):
+ andi t4, a2, 0xf # Enough left for one loop iteration?
+ beq t4, a2, L(chkl)
+ PTR_SUBU a3, a2, t4
+ PTR_ADDU a3, a0 # a3 is last loop address +1
+ move a2, t4 # a2 is now # of bytes left after loop
+L(loopw):
+ PTR_ADDIU a0, 16 # Handle 2 words pr. iteration
+ sd a1, -16(a0)
+ bne a0, a3, L(loopw)
+ sd a1, -8(a0)
+
+L(chkl):
+ andi t4, a2, 0x8 # Check if there is at least a double
+ beq t4, zero, L(last8) # word remaining after the loop
+ PTR_SUBU a2, t4
+ sd a1, 0(a0) # Yes...
+ PTR_ADDIU a0, 8
+
+L(last8):
+ blez a2, L(exit) # Handle last 8 bytes (if cnt>0)
+ PTR_ADDU a3, a2, a0 # a3 is last address +1
+L(lst8l):
+ PTR_ADDIU a0, 1
+ bne a0, a3, L(lst8l)
+ sb a1, -1(a0)
+L(exit):
+ j ra # Bye, bye
+ nop
+
+ .set reorder
+END (memset)
--
Alexandre Oliva Enjoy Guarana', see http://www.ic.unicamp.br/~oliva/
Red Hat GCC Developer aoliva at {redhat dot com, gcc.gnu.org}
CS PhD student at IC-Unicamp oliva at {lsd dot ic dot unicamp dot br, gnu.org}
Free Software Evangelist Professional serial bug killer