This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
GNU C Library master sources branch master updated. glibc-2.20-476-g882c4b9

From: sje at sourceware dot org
To: glibc-cvs at sourceware dot org
Date: 5 Jan 2015 21:28:59 -0000
Subject: GNU C Library master sources branch master updated. glibc-2.20-476-g882c4b9
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  882c4b9f1de8b0175eae6edbd48c9a7b80b63fb9 (commit)
      from  ac4c11f580fde4cd18ebbd6533deab0d18445db0 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=882c4b9f1de8b0175eae6edbd48c9a7b80b63fb9

commit 882c4b9f1de8b0175eae6edbd48c9a7b80b63fb9
Author: Steve Ellcey <sellcey@mips.com>
Date:   Mon Jan 5 13:28:42 2015 -0800

    2015-01-05  Steve Ellcey  <sellcey@imgtec.com>
    
    	* sysdeps/mips/memcpy.S: Add support for mips32r6/mips64r6.

diff --git a/ChangeLog b/ChangeLog
index 4674e72..2afd2b2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2015-01-05  Steve Ellcey  <sellcey@imgtec.com>
+
+	* sysdeps/mips/memcpy.S: Add support for mips32r6/mips64r6.
+
 2015-01-05  Joseph Myers  <joseph@codesourcery.com>
 
 	[BZ #17796]
diff --git a/sysdeps/mips/memcpy.S b/sysdeps/mips/memcpy.S
index 6a174e6..fcd7c03 100644
--- a/sysdeps/mips/memcpy.S
+++ b/sysdeps/mips/memcpy.S
@@ -51,6 +51,13 @@
 #endif
 
 
+#if __mips_isa_rev > 5
+# if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
+#  undef PREFETCH_STORE_HINT
+#  define PREFETCH_STORE_HINT PREFETCH_HINT_STORE_STREAMED
+# endif
+# define R6_CODE
+#endif
 
 /* Some asm.h files do not have the L macro definition.  */
 #ifndef L
@@ -79,6 +86,14 @@
 # endif
 #endif
 
+/* New R6 instructions that may not be in asm.h.  */
+#ifndef PTR_LSA
+# if _MIPS_SIM == _ABI64
+#  define PTR_LSA	dlsa
+# else
+#  define PTR_LSA	lsa
+# endif
+#endif
 
 /*
  * Using PREFETCH_HINT_LOAD_STREAMED instead of PREFETCH_LOAD on load
@@ -221,6 +236,7 @@
 #  define C_LDLO	ldl	/* low part is left in little-endian	*/
 #  define C_STLO	sdl	/* low part is left in little-endian	*/
 # endif
+# define C_ALIGN	dalign	/* r6 align instruction			*/
 #else
 # define C_ST	sw
 # define C_LD	lw
@@ -235,6 +251,7 @@
 #  define C_LDLO	lwl	/* low part is left in little-endian	*/
 #  define C_STLO	swl	/* low part is left in little-endian	*/
 # endif
+# define C_ALIGN	align	/* r6 align instruction			*/
 #endif
 
 /* Bookkeeping values for 32 vs. 64 bit mode.  */
@@ -285,6 +302,9 @@ L(memcpy):
 #else
 	move	v0,a0
 #endif
+
+#ifndef R6_CODE
+
 /*
  * If src and dst have different alignments, go to L(unaligned), if they
  * have the same alignment (but are not actually aligned) do a partial
@@ -305,6 +325,74 @@ L(memcpy):
 	C_STHI	t8,0(a0)
 	PTR_ADDU a0,a0,a3
 
+#else /* R6_CODE */
+
+/*
+ * Align the destination and hope that the source gets aligned too.  If it
+ * doesn't we jump to L(r6_unaligned*) to do unaligned copies using the r6
+ * align instruction.
+ */
+	andi	t8,a0,7
+	lapc	t9,L(atable)
+	PTR_LSA	t9,t8,t9,2
+	jrc	t9
+L(atable):
+	bc	L(lb0)
+	bc	L(lb7)
+	bc	L(lb6)
+	bc	L(lb5)
+	bc	L(lb4)
+	bc	L(lb3)
+	bc	L(lb2)
+	bc	L(lb1)
+L(lb7):
+	lb	a3, 6(a1)
+	sb	a3, 6(a0)
+L(lb6):
+	lb	a3, 5(a1)
+	sb	a3, 5(a0)
+L(lb5):
+	lb	a3, 4(a1)
+	sb	a3, 4(a0)
+L(lb4):
+	lb	a3, 3(a1)
+	sb	a3, 3(a0)
+L(lb3):
+	lb	a3, 2(a1)
+	sb	a3, 2(a0)
+L(lb2):
+	lb	a3, 1(a1)
+	sb	a3, 1(a0)
+L(lb1):
+	lb	a3, 0(a1)
+	sb	a3, 0(a0)
+
+	li	t9,8
+	subu	t8,t9,t8
+	PTR_SUBU a2,a2,t8
+	PTR_ADDU a0,a0,t8
+	PTR_ADDU a1,a1,t8
+L(lb0):
+
+	andi	t8,a1,(NSIZE-1)
+	lapc	t9,L(jtable)
+	PTR_LSA	t9,t8,t9,2
+	jrc	t9
+L(jtable):
+        bc      L(aligned)
+        bc      L(r6_unaligned1)
+        bc      L(r6_unaligned2)
+        bc      L(r6_unaligned3)
+# ifdef USE_DOUBLE
+        bc      L(r6_unaligned4)
+        bc      L(r6_unaligned5)
+        bc      L(r6_unaligned6)
+        bc      L(r6_unaligned7)
+# endif
+#endif /* R6_CODE */
+
+L(aligned):
+
 /*
  * Now dst/src are both aligned to (word or double word) aligned addresses
  * Set a2 to count how many bytes we have to copy after all the 64/128 byte
@@ -313,7 +401,6 @@ L(memcpy):
  * equals a3.
  */
 
-L(aligned):
 	andi	t8,a2,NSIZEDMASK /* any whole 64-byte/128-byte chunks? */
 	beq	a2,t8,L(chkw)	 /* if a2==t8, no 64-byte/128-byte chunks */
 	PTR_SUBU a3,a2,t8	 /* subtract from a2 the reminder */
@@ -363,8 +450,12 @@ L(loop16w):
 	bgtz	v1,L(skip_pref)
 #endif
 	C_LD	t1,UNIT(1)(a1)
+#ifdef R6_CODE
+	PREFETCH_FOR_STORE (2, a0)
+#else
 	PREFETCH_FOR_STORE (4, a0)
 	PREFETCH_FOR_STORE (5, a0)
+#endif
 #if defined(RETURN_LAST_PREFETCH) && defined(USE_PREFETCH)
 	PTR_ADDIU v0,a0,(PREFETCH_CHUNK*5)
 # ifdef USE_DOUBLE
@@ -378,8 +469,11 @@ L(skip_pref):
 	C_LD	REG5,UNIT(5)(a1)
 	C_LD	REG6,UNIT(6)(a1)
 	C_LD	REG7,UNIT(7)(a1)
-        PREFETCH_FOR_LOAD (4, a1)
-
+#ifdef R6_CODE
+	PREFETCH_FOR_LOAD (3, a1)
+#else
+	PREFETCH_FOR_LOAD (4, a1)
+#endif
 	C_ST	t0,UNIT(0)(a0)
 	C_ST	t1,UNIT(1)(a0)
 	C_ST	REG2,UNIT(2)(a0)
@@ -397,7 +491,9 @@ L(skip_pref):
 	C_LD	REG5,UNIT(13)(a1)
 	C_LD	REG6,UNIT(14)(a1)
 	C_LD	REG7,UNIT(15)(a1)
+#ifndef R6_CODE
         PREFETCH_FOR_LOAD (5, a1)
+#endif
 	C_ST	t0,UNIT(8)(a0)
 	C_ST	t1,UNIT(9)(a0)
 	C_ST	REG2,UNIT(10)(a0)
@@ -476,6 +572,8 @@ L(lastbloop):
 L(leave):
 	j	ra
 	nop
+
+#ifndef R6_CODE
 /*
  * UNALIGNED case, got here with a3 = "negu a0"
  * This code is nearly identical to the aligned code above
@@ -510,38 +608,38 @@ L(ua_chk16w):
 	PTR_SUBU a3,a2,t8	 /* subtract from a2 the reminder */
 	PTR_ADDU a3,a0,a3	 /* Now a3 is the final dst after loop */
 
-#if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
+# if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
 	PTR_ADDU t0,a0,a2	  /* t0 is the "past the end" address */
 	PTR_SUBU t9,t0,PREFETCH_LIMIT /* t9 is the "last safe pref" address */
-#endif
+# endif
 	PREFETCH_FOR_LOAD  (0, a1)
 	PREFETCH_FOR_LOAD  (1, a1)
 	PREFETCH_FOR_LOAD  (2, a1)
-#if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT != PREFETCH_HINT_PREPAREFORSTORE)
+# if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT != PREFETCH_HINT_PREPAREFORSTORE)
 	PREFETCH_FOR_STORE (1, a0)
 	PREFETCH_FOR_STORE (2, a0)
 	PREFETCH_FOR_STORE (3, a0)
-#endif
-#if defined(RETURN_FIRST_PREFETCH) && defined(USE_PREFETCH)
-# if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
+# endif
+# if defined(RETURN_FIRST_PREFETCH) && defined(USE_PREFETCH)
+#  if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
 	sltu    v1,t9,a0
 	bgtz    v1,L(ua_skip_set)
 	nop
 	PTR_ADDIU v0,a0,(PREFETCH_CHUNK*4)
 L(ua_skip_set):
-# else
+#  else
 	PTR_ADDIU v0,a0,(PREFETCH_CHUNK*1)
+#  endif
 # endif
-#endif
 L(ua_loop16w):
 	PREFETCH_FOR_LOAD  (3, a1)
 	C_LDHI	t0,UNIT(0)(a1)
 	C_LDHI	t1,UNIT(1)(a1)
 	C_LDHI	REG2,UNIT(2)(a1)
-#if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
+# if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
 	sltu	v1,t9,a0
 	bgtz	v1,L(ua_skip_pref)
-#endif
+# endif
 	C_LDHI	REG3,UNIT(3)(a1)
 	PREFETCH_FOR_STORE (4, a0)
 	PREFETCH_FOR_STORE (5, a0)
@@ -667,6 +765,59 @@ L(ua_smallCopy_loop):
 	j	ra
 	nop
 
+#else /* R6_CODE */
+
+# if __MIPSEB
+#  define SWAP_REGS(X,Y) X, Y
+#  define ALIGN_OFFSET(N) (N)
+# else
+#  define SWAP_REGS(X,Y) Y, X
+#  define ALIGN_OFFSET(N) (NSIZE-N)
+# endif
+# define R6_UNALIGNED_WORD_COPY(BYTEOFFSET) \
+	andi	REG7, a2, (NSIZE-1);/* REG7 is # of bytes to by bytes.     */ \
+	beq	REG7, a2, L(lastb); /* Check for bytes to copy by word	   */ \
+	PTR_SUBU a3, a2, REG7;	/* a3 is number of bytes to be copied in   */ \
+				/* (d)word chunks.			   */ \
+	move	a2, REG7;	/* a2 is # of bytes to copy byte by byte   */ \
+				/* after word loop is finished.		   */ \
+	PTR_ADDU REG6, a0, a3;	/* REG6 is the dst address after loop.	   */ \
+	PTR_SUBU REG2, a1, t8;	/* REG2 is the aligned src address.	   */ \
+	PTR_ADDU a1, a1, a3;	/* a1 is addr of source after word loop.   */ \
+	C_LD	t0, UNIT(0)(REG2);  /* Load first part of source.	   */ \
+L(r6_ua_wordcopy##BYTEOFFSET):						      \
+	C_LD	t1, UNIT(1)(REG2);  /* Load second part of source.	   */ \
+	C_ALIGN	REG3, SWAP_REGS(t1,t0), ALIGN_OFFSET(BYTEOFFSET);	      \
+	PTR_ADDIU a0, a0, UNIT(1);  /* Increment destination pointer.	   */ \
+	PTR_ADDIU REG2, REG2, UNIT(1); /* Increment aligned source pointer.*/ \
+	move	t0, t1;		/* Move second part of source to first.	   */ \
+	bne	a0, REG6,L(r6_ua_wordcopy##BYTEOFFSET);			      \
+	C_ST	REG3, UNIT(-1)(a0);					      \
+	j	L(lastb);						      \
+	nop
+
+	/* We are generating R6 code, the destination is 4 byte aligned and
+	   the source is not 4 byte aligned. t8 is 1, 2, or 3 depending on the
+           alignment of the source.  */
+
+L(r6_unaligned1):
+	R6_UNALIGNED_WORD_COPY(1)
+L(r6_unaligned2):
+	R6_UNALIGNED_WORD_COPY(2)
+L(r6_unaligned3):
+	R6_UNALIGNED_WORD_COPY(3)
+# ifdef USE_DOUBLE
+L(r6_unaligned4):
+	R6_UNALIGNED_WORD_COPY(4)
+L(r6_unaligned5):
+	R6_UNALIGNED_WORD_COPY(5)
+L(r6_unaligned6):
+	R6_UNALIGNED_WORD_COPY(6)
+L(r6_unaligned7):
+	R6_UNALIGNED_WORD_COPY(7)
+# endif
+#endif /* R6_CODE */
+
 	.set	at
 	.set	reorder
 END(MEMCPY_NAME)

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog             |    4 +
 sysdeps/mips/memcpy.S |  177 +++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 168 insertions(+), 13 deletions(-)


hooks/post-receive
-- 
GNU C Library master sources
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]