This is the mail archive of the
glibc-cvs@sourceware.org
mailing list for the glibc project.
GNU C Library master sources branch release/2.26/master updated. glibc-2.26-87-g6850e9c
- From: raji at sourceware dot org
- To: glibc-cvs at sourceware dot org
- Date: 21 Nov 2017 17:11:51 -0000
- Subject: GNU C Library master sources branch release/2.26/master updated. glibc-2.26-87-g6850e9c
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".
The branch, release/2.26/master has been updated
via 6850e9c6bad862a1b982f456096c54946c2aaeab (commit)
from 2767ebd8bc34c8b632ea737296200a86f57289ad (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=6850e9c6bad862a1b982f456096c54946c2aaeab
commit 6850e9c6bad862a1b982f456096c54946c2aaeab
Author: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
Date: Tue Nov 21 22:32:20 2017 +0530
powerpc: Replace lxvd2x/stxvd2x with lvx/stvx in P7's memcpy/memmove
POWER9 DD2.1 and earlier has an issue where some cache inhibited
vector load traps to the kernel, causing a performance degradation. To
handle this in memcpy and memmove, lvx/stvx is used for aligned
addresses instead of lxvd2x/stxvd2x.
Reference: https://patchwork.ozlabs.org/patch/814059/
* sysdeps/powerpc/powerpc64/power7/memcpy.S: Replace
lxvd2x/stxvd2x with lvx/stvx.
* sysdeps/powerpc/powerpc64/power7/memmove.S: Likewise.
Reviewed-by: Tulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com>
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
(cherry picked from commit 63da5cd4a097d089033d980c42254c3356fa723f)
diff --git a/ChangeLog b/ChangeLog
index eb9204d..c72c239 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2017-11-21 Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
+
+ * sysdeps/powerpc/powerpc64/power7/memcpy.S: Replace
+ lxvd2x/stxvd2x with lvx/stvx.
+ * sysdeps/powerpc/powerpc64/power7/memmove.S: Likewise.
+
2017-10-04 Florian Weimer <fweimer@redhat.com>
* scripts/check-local-headers.sh: Ignore nspr4 header file
diff --git a/sysdeps/powerpc/powerpc64/power7/memcpy.S b/sysdeps/powerpc/powerpc64/power7/memcpy.S
index 641c7e2..c9b6507 100644
--- a/sysdeps/powerpc/powerpc64/power7/memcpy.S
+++ b/sysdeps/powerpc/powerpc64/power7/memcpy.S
@@ -91,63 +91,63 @@ L(aligned_copy):
srdi 12,cnt,7
cmpdi 12,0
beq L(aligned_tail)
- lxvd2x 6,0,src
- lxvd2x 7,src,6
+ lvx 6,0,src
+ lvx 7,src,6
mtctr 12
b L(aligned_128loop)
.align 4
L(aligned_128head):
/* for the 2nd + iteration of this loop. */
- lxvd2x 6,0,src
- lxvd2x 7,src,6
+ lvx 6,0,src
+ lvx 7,src,6
L(aligned_128loop):
- lxvd2x 8,src,7
- lxvd2x 9,src,8
- stxvd2x 6,0,dst
+ lvx 8,src,7
+ lvx 9,src,8
+ stvx 6,0,dst
addi src,src,64
- stxvd2x 7,dst,6
- stxvd2x 8,dst,7
- stxvd2x 9,dst,8
- lxvd2x 6,0,src
- lxvd2x 7,src,6
+ stvx 7,dst,6
+ stvx 8,dst,7
+ stvx 9,dst,8
+ lvx 6,0,src
+ lvx 7,src,6
addi dst,dst,64
- lxvd2x 8,src,7
- lxvd2x 9,src,8
+ lvx 8,src,7
+ lvx 9,src,8
addi src,src,64
- stxvd2x 6,0,dst
- stxvd2x 7,dst,6
- stxvd2x 8,dst,7
- stxvd2x 9,dst,8
+ stvx 6,0,dst
+ stvx 7,dst,6
+ stvx 8,dst,7
+ stvx 9,dst,8
addi dst,dst,64
bdnz L(aligned_128head)
L(aligned_tail):
mtocrf 0x01,cnt
bf 25,32f
- lxvd2x 6,0,src
- lxvd2x 7,src,6
- lxvd2x 8,src,7
- lxvd2x 9,src,8
+ lvx 6,0,src
+ lvx 7,src,6
+ lvx 8,src,7
+ lvx 9,src,8
addi src,src,64
- stxvd2x 6,0,dst
- stxvd2x 7,dst,6
- stxvd2x 8,dst,7
- stxvd2x 9,dst,8
+ stvx 6,0,dst
+ stvx 7,dst,6
+ stvx 8,dst,7
+ stvx 9,dst,8
addi dst,dst,64
32:
bf 26,16f
- lxvd2x 6,0,src
- lxvd2x 7,src,6
+ lvx 6,0,src
+ lvx 7,src,6
addi src,src,32
- stxvd2x 6,0,dst
- stxvd2x 7,dst,6
+ stvx 6,0,dst
+ stvx 7,dst,6
addi dst,dst,32
16:
bf 27,8f
- lxvd2x 6,0,src
+ lvx 6,0,src
addi src,src,16
- stxvd2x 6,0,dst
+ stvx 6,0,dst
addi dst,dst,16
8:
bf 28,4f
diff --git a/sysdeps/powerpc/powerpc64/power7/memmove.S b/sysdeps/powerpc/powerpc64/power7/memmove.S
index 93baa69..667c6e2 100644
--- a/sysdeps/powerpc/powerpc64/power7/memmove.S
+++ b/sysdeps/powerpc/powerpc64/power7/memmove.S
@@ -92,63 +92,63 @@ L(aligned_copy):
srdi 12,r5,7
cmpdi 12,0
beq L(aligned_tail)
- lxvd2x 6,0,r4
- lxvd2x 7,r4,6
+ lvx 6,0,r4
+ lvx 7,r4,6
mtctr 12
b L(aligned_128loop)
.align 4
L(aligned_128head):
/* for the 2nd + iteration of this loop. */
- lxvd2x 6,0,r4
- lxvd2x 7,r4,6
+ lvx 6,0,r4
+ lvx 7,r4,6
L(aligned_128loop):
- lxvd2x 8,r4,7
- lxvd2x 9,r4,8
- stxvd2x 6,0,r11
+ lvx 8,r4,7
+ lvx 9,r4,8
+ stvx 6,0,r11
addi r4,r4,64
- stxvd2x 7,r11,6
- stxvd2x 8,r11,7
- stxvd2x 9,r11,8
- lxvd2x 6,0,r4
- lxvd2x 7,r4,6
+ stvx 7,r11,6
+ stvx 8,r11,7
+ stvx 9,r11,8
+ lvx 6,0,r4
+ lvx 7,r4,6
addi r11,r11,64
- lxvd2x 8,r4,7
- lxvd2x 9,r4,8
+ lvx 8,r4,7
+ lvx 9,r4,8
addi r4,r4,64
- stxvd2x 6,0,r11
- stxvd2x 7,r11,6
- stxvd2x 8,r11,7
- stxvd2x 9,r11,8
+ stvx 6,0,r11
+ stvx 7,r11,6
+ stvx 8,r11,7
+ stvx 9,r11,8
addi r11,r11,64
bdnz L(aligned_128head)
L(aligned_tail):
mtocrf 0x01,r5
bf 25,32f
- lxvd2x 6,0,r4
- lxvd2x 7,r4,6
- lxvd2x 8,r4,7
- lxvd2x 9,r4,8
+ lvx 6,0,r4
+ lvx 7,r4,6
+ lvx 8,r4,7
+ lvx 9,r4,8
addi r4,r4,64
- stxvd2x 6,0,r11
- stxvd2x 7,r11,6
- stxvd2x 8,r11,7
- stxvd2x 9,r11,8
+ stvx 6,0,r11
+ stvx 7,r11,6
+ stvx 8,r11,7
+ stvx 9,r11,8
addi r11,r11,64
32:
bf 26,16f
- lxvd2x 6,0,r4
- lxvd2x 7,r4,6
+ lvx 6,0,r4
+ lvx 7,r4,6
addi r4,r4,32
- stxvd2x 6,0,r11
- stxvd2x 7,r11,6
+ stvx 6,0,r11
+ stvx 7,r11,6
addi r11,r11,32
16:
bf 27,8f
- lxvd2x 6,0,r4
+ lvx 6,0,r4
addi r4,r4,16
- stxvd2x 6,0,r11
+ stvx 6,0,r11
addi r11,r11,16
8:
bf 28,4f
@@ -488,63 +488,63 @@ L(aligned_copy_bwd):
srdi r12,r5,7
cmpdi r12,0
beq L(aligned_tail_bwd)
- lxvd2x v6,r4,r6
- lxvd2x v7,r4,r7
+ lvx v6,r4,r6
+ lvx v7,r4,r7
mtctr 12
b L(aligned_128loop_bwd)
.align 4
L(aligned_128head_bwd):
/* for the 2nd + iteration of this loop. */
- lxvd2x v6,r4,r6
- lxvd2x v7,r4,r7
+ lvx v6,r4,r6
+ lvx v7,r4,r7
L(aligned_128loop_bwd):
- lxvd2x v8,r4,r8
- lxvd2x v9,r4,r9
- stxvd2x v6,r11,r6
+ lvx v8,r4,r8
+ lvx v9,r4,r9
+ stvx v6,r11,r6
subi r4,r4,64
- stxvd2x v7,r11,r7
- stxvd2x v8,r11,r8
- stxvd2x v9,r11,r9
- lxvd2x v6,r4,r6
- lxvd2x v7,r4,7
+ stvx v7,r11,r7
+ stvx v8,r11,r8
+ stvx v9,r11,r9
+ lvx v6,r4,r6
+ lvx v7,r4,7
subi r11,r11,64
- lxvd2x v8,r4,r8
- lxvd2x v9,r4,r9
+ lvx v8,r4,r8
+ lvx v9,r4,r9
subi r4,r4,64
- stxvd2x v6,r11,r6
- stxvd2x v7,r11,r7
- stxvd2x v8,r11,r8
- stxvd2x v9,r11,r9
+ stvx v6,r11,r6
+ stvx v7,r11,r7
+ stvx v8,r11,r8
+ stvx v9,r11,r9
subi r11,r11,64
bdnz L(aligned_128head_bwd)
L(aligned_tail_bwd):
mtocrf 0x01,r5
bf 25,32f
- lxvd2x v6,r4,r6
- lxvd2x v7,r4,r7
- lxvd2x v8,r4,r8
- lxvd2x v9,r4,r9
+ lvx v6,r4,r6
+ lvx v7,r4,r7
+ lvx v8,r4,r8
+ lvx v9,r4,r9
subi r4,r4,64
- stxvd2x v6,r11,r6
- stxvd2x v7,r11,r7
- stxvd2x v8,r11,r8
- stxvd2x v9,r11,r9
+ stvx v6,r11,r6
+ stvx v7,r11,r7
+ stvx v8,r11,r8
+ stvx v9,r11,r9
subi r11,r11,64
32:
bf 26,16f
- lxvd2x v6,r4,r6
- lxvd2x v7,r4,r7
+ lvx v6,r4,r6
+ lvx v7,r4,r7
subi r4,r4,32
- stxvd2x v6,r11,r6
- stxvd2x v7,r11,r7
+ stvx v6,r11,r6
+ stvx v7,r11,r7
subi r11,r11,32
16:
bf 27,8f
- lxvd2x v6,r4,r6
+ lvx v6,r4,r6
subi r4,r4,16
- stxvd2x v6,r11,r6
+ stvx v6,r11,r6
subi r11,r11,16
8:
bf 28,4f
-----------------------------------------------------------------------
Summary of changes:
ChangeLog | 6 ++
sysdeps/powerpc/powerpc64/power7/memcpy.S | 64 +++++++-------
sysdeps/powerpc/powerpc64/power7/memmove.S | 128 ++++++++++++++--------------
3 files changed, 102 insertions(+), 96 deletions(-)
hooks/post-receive
--
GNU C Library master sources