This is the mail archive of the
glibc-cvs@sourceware.org
mailing list for the glibc project.
GNU C Library master sources branch master updated. glibc-2.20-476-g882c4b9
- From: sje at sourceware dot org
- To: glibc-cvs at sourceware dot org
- Date: 5 Jan 2015 21:28:59 -0000
- Subject: GNU C Library master sources branch master updated. glibc-2.20-476-g882c4b9
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".
The branch, master has been updated
via 882c4b9f1de8b0175eae6edbd48c9a7b80b63fb9 (commit)
from ac4c11f580fde4cd18ebbd6533deab0d18445db0 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=882c4b9f1de8b0175eae6edbd48c9a7b80b63fb9
commit 882c4b9f1de8b0175eae6edbd48c9a7b80b63fb9
Author: Steve Ellcey <sellcey@mips.com>
Date: Mon Jan 5 13:28:42 2015 -0800
2015-01-05 Steve Ellcey <sellcey@imgtec.com>
* sysdeps/mips/memcpy.S: Add support for mips32r6/mips64r6.
diff --git a/ChangeLog b/ChangeLog
index 4674e72..2afd2b2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2015-01-05 Steve Ellcey <sellcey@imgtec.com>
+
+ * sysdeps/mips/memcpy.S: Add support for mips32r6/mips64r6.
+
2015-01-05 Joseph Myers <joseph@codesourcery.com>
[BZ #17796]
diff --git a/sysdeps/mips/memcpy.S b/sysdeps/mips/memcpy.S
index 6a174e6..fcd7c03 100644
--- a/sysdeps/mips/memcpy.S
+++ b/sysdeps/mips/memcpy.S
@@ -51,6 +51,13 @@
#endif
+#if __mips_isa_rev > 5
+# if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
+# undef PREFETCH_STORE_HINT
+# define PREFETCH_STORE_HINT PREFETCH_HINT_STORE_STREAMED
+# endif
+# define R6_CODE
+#endif
/* Some asm.h files do not have the L macro definition. */
#ifndef L
@@ -79,6 +86,14 @@
# endif
#endif
+/* New R6 instructions that may not be in asm.h. */
+#ifndef PTR_LSA
+# if _MIPS_SIM == _ABI64
+# define PTR_LSA dlsa
+# else
+# define PTR_LSA lsa
+# endif
+#endif
/*
* Using PREFETCH_HINT_LOAD_STREAMED instead of PREFETCH_LOAD on load
@@ -221,6 +236,7 @@
# define C_LDLO ldl /* low part is left in little-endian */
# define C_STLO sdl /* low part is left in little-endian */
# endif
+# define C_ALIGN dalign /* r6 align instruction */
#else
# define C_ST sw
# define C_LD lw
@@ -235,6 +251,7 @@
# define C_LDLO lwl /* low part is left in little-endian */
# define C_STLO swl /* low part is left in little-endian */
# endif
+# define C_ALIGN align /* r6 align instruction */
#endif
/* Bookkeeping values for 32 vs. 64 bit mode. */
@@ -285,6 +302,9 @@ L(memcpy):
#else
move v0,a0
#endif
+
+#ifndef R6_CODE
+
/*
* If src and dst have different alignments, go to L(unaligned), if they
* have the same alignment (but are not actually aligned) do a partial
@@ -305,6 +325,74 @@ L(memcpy):
C_STHI t8,0(a0)
PTR_ADDU a0,a0,a3
+#else /* R6_CODE */
+
+/*
+ * Align the destination and hope that the source gets aligned too. If it
+ * doesn't we jump to L(r6_unaligned*) to do unaligned copies using the r6
+ * align instruction.
+ */
+ andi t8,a0,7
+ lapc t9,L(atable)
+ PTR_LSA t9,t8,t9,2
+ jrc t9
+L(atable):
+ bc L(lb0)
+ bc L(lb7)
+ bc L(lb6)
+ bc L(lb5)
+ bc L(lb4)
+ bc L(lb3)
+ bc L(lb2)
+ bc L(lb1)
+L(lb7):
+ lb a3, 6(a1)
+ sb a3, 6(a0)
+L(lb6):
+ lb a3, 5(a1)
+ sb a3, 5(a0)
+L(lb5):
+ lb a3, 4(a1)
+ sb a3, 4(a0)
+L(lb4):
+ lb a3, 3(a1)
+ sb a3, 3(a0)
+L(lb3):
+ lb a3, 2(a1)
+ sb a3, 2(a0)
+L(lb2):
+ lb a3, 1(a1)
+ sb a3, 1(a0)
+L(lb1):
+ lb a3, 0(a1)
+ sb a3, 0(a0)
+
+ li t9,8
+ subu t8,t9,t8
+ PTR_SUBU a2,a2,t8
+ PTR_ADDU a0,a0,t8
+ PTR_ADDU a1,a1,t8
+L(lb0):
+
+ andi t8,a1,(NSIZE-1)
+ lapc t9,L(jtable)
+ PTR_LSA t9,t8,t9,2
+ jrc t9
+L(jtable):
+ bc L(aligned)
+ bc L(r6_unaligned1)
+ bc L(r6_unaligned2)
+ bc L(r6_unaligned3)
+# ifdef USE_DOUBLE
+ bc L(r6_unaligned4)
+ bc L(r6_unaligned5)
+ bc L(r6_unaligned6)
+ bc L(r6_unaligned7)
+# endif
+#endif /* R6_CODE */
+
+L(aligned):
+
/*
* Now dst/src are both aligned to (word or double word) aligned addresses
* Set a2 to count how many bytes we have to copy after all the 64/128 byte
@@ -313,7 +401,6 @@ L(memcpy):
* equals a3.
*/
-L(aligned):
andi t8,a2,NSIZEDMASK /* any whole 64-byte/128-byte chunks? */
beq a2,t8,L(chkw) /* if a2==t8, no 64-byte/128-byte chunks */
PTR_SUBU a3,a2,t8 /* subtract from a2 the reminder */
@@ -363,8 +450,12 @@ L(loop16w):
bgtz v1,L(skip_pref)
#endif
C_LD t1,UNIT(1)(a1)
+#ifdef R6_CODE
+ PREFETCH_FOR_STORE (2, a0)
+#else
PREFETCH_FOR_STORE (4, a0)
PREFETCH_FOR_STORE (5, a0)
+#endif
#if defined(RETURN_LAST_PREFETCH) && defined(USE_PREFETCH)
PTR_ADDIU v0,a0,(PREFETCH_CHUNK*5)
# ifdef USE_DOUBLE
@@ -378,8 +469,11 @@ L(skip_pref):
C_LD REG5,UNIT(5)(a1)
C_LD REG6,UNIT(6)(a1)
C_LD REG7,UNIT(7)(a1)
- PREFETCH_FOR_LOAD (4, a1)
-
+#ifdef R6_CODE
+ PREFETCH_FOR_LOAD (3, a1)
+#else
+ PREFETCH_FOR_LOAD (4, a1)
+#endif
C_ST t0,UNIT(0)(a0)
C_ST t1,UNIT(1)(a0)
C_ST REG2,UNIT(2)(a0)
@@ -397,7 +491,9 @@ L(skip_pref):
C_LD REG5,UNIT(13)(a1)
C_LD REG6,UNIT(14)(a1)
C_LD REG7,UNIT(15)(a1)
+#ifndef R6_CODE
PREFETCH_FOR_LOAD (5, a1)
+#endif
C_ST t0,UNIT(8)(a0)
C_ST t1,UNIT(9)(a0)
C_ST REG2,UNIT(10)(a0)
@@ -476,6 +572,8 @@ L(lastbloop):
L(leave):
j ra
nop
+
+#ifndef R6_CODE
/*
* UNALIGNED case, got here with a3 = "negu a0"
* This code is nearly identical to the aligned code above
@@ -510,38 +608,38 @@ L(ua_chk16w):
PTR_SUBU a3,a2,t8 /* subtract from a2 the reminder */
PTR_ADDU a3,a0,a3 /* Now a3 is the final dst after loop */
-#if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
+# if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
PTR_ADDU t0,a0,a2 /* t0 is the "past the end" address */
PTR_SUBU t9,t0,PREFETCH_LIMIT /* t9 is the "last safe pref" address */
-#endif
+# endif
PREFETCH_FOR_LOAD (0, a1)
PREFETCH_FOR_LOAD (1, a1)
PREFETCH_FOR_LOAD (2, a1)
-#if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT != PREFETCH_HINT_PREPAREFORSTORE)
+# if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT != PREFETCH_HINT_PREPAREFORSTORE)
PREFETCH_FOR_STORE (1, a0)
PREFETCH_FOR_STORE (2, a0)
PREFETCH_FOR_STORE (3, a0)
-#endif
-#if defined(RETURN_FIRST_PREFETCH) && defined(USE_PREFETCH)
-# if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
+# endif
+# if defined(RETURN_FIRST_PREFETCH) && defined(USE_PREFETCH)
+# if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
sltu v1,t9,a0
bgtz v1,L(ua_skip_set)
nop
PTR_ADDIU v0,a0,(PREFETCH_CHUNK*4)
L(ua_skip_set):
-# else
+# else
PTR_ADDIU v0,a0,(PREFETCH_CHUNK*1)
+# endif
# endif
-#endif
L(ua_loop16w):
PREFETCH_FOR_LOAD (3, a1)
C_LDHI t0,UNIT(0)(a1)
C_LDHI t1,UNIT(1)(a1)
C_LDHI REG2,UNIT(2)(a1)
-#if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
+# if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
sltu v1,t9,a0
bgtz v1,L(ua_skip_pref)
-#endif
+# endif
C_LDHI REG3,UNIT(3)(a1)
PREFETCH_FOR_STORE (4, a0)
PREFETCH_FOR_STORE (5, a0)
@@ -667,6 +765,59 @@ L(ua_smallCopy_loop):
j ra
nop
+#else /* R6_CODE */
+
+# if __MIPSEB
+# define SWAP_REGS(X,Y) X, Y
+# define ALIGN_OFFSET(N) (N)
+# else
+# define SWAP_REGS(X,Y) Y, X
+# define ALIGN_OFFSET(N) (NSIZE-N)
+# endif
+# define R6_UNALIGNED_WORD_COPY(BYTEOFFSET) \
+ andi REG7, a2, (NSIZE-1);/* REG7 is # of bytes to by bytes. */ \
+ beq REG7, a2, L(lastb); /* Check for bytes to copy by word */ \
+ PTR_SUBU a3, a2, REG7; /* a3 is number of bytes to be copied in */ \
+ /* (d)word chunks. */ \
+ move a2, REG7; /* a2 is # of bytes to copy byte by byte */ \
+ /* after word loop is finished. */ \
+ PTR_ADDU REG6, a0, a3; /* REG6 is the dst address after loop. */ \
+ PTR_SUBU REG2, a1, t8; /* REG2 is the aligned src address. */ \
+ PTR_ADDU a1, a1, a3; /* a1 is addr of source after word loop. */ \
+ C_LD t0, UNIT(0)(REG2); /* Load first part of source. */ \
+L(r6_ua_wordcopy##BYTEOFFSET): \
+ C_LD t1, UNIT(1)(REG2); /* Load second part of source. */ \
+ C_ALIGN REG3, SWAP_REGS(t1,t0), ALIGN_OFFSET(BYTEOFFSET); \
+ PTR_ADDIU a0, a0, UNIT(1); /* Increment destination pointer. */ \
+ PTR_ADDIU REG2, REG2, UNIT(1); /* Increment aligned source pointer.*/ \
+ move t0, t1; /* Move second part of source to first. */ \
+ bne a0, REG6,L(r6_ua_wordcopy##BYTEOFFSET); \
+ C_ST REG3, UNIT(-1)(a0); \
+ j L(lastb); \
+ nop
+
+ /* We are generating R6 code, the destination is 4 byte aligned and
+ the source is not 4 byte aligned. t8 is 1, 2, or 3 depending on the
+ alignment of the source. */
+
+L(r6_unaligned1):
+ R6_UNALIGNED_WORD_COPY(1)
+L(r6_unaligned2):
+ R6_UNALIGNED_WORD_COPY(2)
+L(r6_unaligned3):
+ R6_UNALIGNED_WORD_COPY(3)
+# ifdef USE_DOUBLE
+L(r6_unaligned4):
+ R6_UNALIGNED_WORD_COPY(4)
+L(r6_unaligned5):
+ R6_UNALIGNED_WORD_COPY(5)
+L(r6_unaligned6):
+ R6_UNALIGNED_WORD_COPY(6)
+L(r6_unaligned7):
+ R6_UNALIGNED_WORD_COPY(7)
+# endif
+#endif /* R6_CODE */
+
.set at
.set reorder
END(MEMCPY_NAME)
-----------------------------------------------------------------------
Summary of changes:
ChangeLog | 4 +
sysdeps/mips/memcpy.S | 177 +++++++++++++++++++++++++++++++++++++++++++++----
2 files changed, 168 insertions(+), 13 deletions(-)
hooks/post-receive
--
GNU C Library master sources