This is the mail archive of the
glibc-cvs@sourceware.org
mailing list for the glibc project.
GNU C Library master sources branch hjl/pr22353/master created. glibc-2.26.9000-679-g75588c0
- From: hjl at sourceware dot org
- To: glibc-cvs at sourceware dot org
- Date: 29 Oct 2017 16:58:36 -0000
- Subject: GNU C Library master sources branch hjl/pr22353/master created. glibc-2.26.9000-679-g75588c0
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".
The branch, hjl/pr22353/master has been created
at 75588c0e4cd27ad868256d094b5f7646bc404ca8 (commit)
- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=75588c0e4cd27ad868256d094b5f7646bc404ca8
commit 75588c0e4cd27ad868256d094b5f7646bc404ca8
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Fri Oct 27 06:31:57 2017 -0700
Add strcpy-stosb.S
diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile
index bf75a99..52e0e73 100644
--- a/sysdeps/i386/i686/multiarch/Makefile
+++ b/sysdeps/i386/i686/multiarch/Makefile
@@ -30,7 +30,8 @@ sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \
mempcpy-ia32 memset-ia32 strcat-ia32 strchr-ia32 \
strrchr-ia32 strcpy-ia32 strcmp-ia32 strcspn-ia32 \
strpbrk-ia32 strspn-ia32 strlen-ia32 stpcpy-ia32 \
- stpncpy-ia32
+ stpncpy-ia32 \
+ stpcpy-i386 strcpy-i386 stpcpy-stosb strcpy-stosb
CFLAGS-varshift.c += -msse4
CFLAGS-strcspn-c.c += -msse4
CFLAGS-strpbrk-c.c += -msse4
diff --git a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c b/sysdeps/i386/i686/multiarch/ifunc-impl-list.c
index 6e20cff..0d81691 100644
--- a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c
+++ b/sysdeps/i386/i686/multiarch/ifunc-impl-list.c
@@ -147,6 +147,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__stpcpy_ssse3)
IFUNC_IMPL_ADD (array, i, stpcpy, HAS_CPU_FEATURE (SSE2),
__stpcpy_sse2)
+ IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_i386)
+ IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_stosb)
IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_ia32))
/* Support sysdeps/i386/i686/multiarch/strcasecmp.S. */
@@ -200,6 +202,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__strcpy_ssse3)
IFUNC_IMPL_ADD (array, i, strcpy, HAS_CPU_FEATURE (SSE2),
__strcpy_sse2)
+ IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_i386)
+ IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_stosb)
IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_ia32))
/* Support sysdeps/i386/i686/multiarch/strcspn.S. */
diff --git a/sysdeps/i386/i686/multiarch/stpcpy-i386.S b/sysdeps/i386/i686/multiarch/stpcpy-i386.S
new file mode 100644
index 0000000..2a25959
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/stpcpy-i386.S
@@ -0,0 +1,31 @@
+/* stpcpy optimized for i386.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define __stpcpy __stpcpy_i386
+
+#undef libc_hidden_def
+#define libc_hidden_def(ignored)
+
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(ignored)
+
+#undef weak_alias
+#define weak_alias(original, alias)
+
+#include <sysdeps/i386/stpcpy.S>
diff --git a/sysdeps/i386/i686/multiarch/stpcpy-stosb.S b/sysdeps/i386/i686/multiarch/stpcpy-stosb.S
new file mode 100644
index 0000000..2063ff8
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/stpcpy-stosb.S
@@ -0,0 +1,23 @@
+/* stpcpy optimized with stosb.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define USE_AS_STPCPY
+#define STRCPY __stpcpy_stosb
+
+#include <sysdeps/i386/i686/multiarch/strcpy-stosb.S>
diff --git a/sysdeps/i386/i686/multiarch/strcpy-i386.S b/sysdeps/i386/i686/multiarch/strcpy-i386.S
new file mode 100644
index 0000000..3c0b1d9
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/strcpy-i386.S
@@ -0,0 +1,25 @@
+/* strcpy optimized for i386.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define strcpy __strcpy_i386
+
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(ignored)
+
+#include <sysdeps/i386/strcpy.S>
diff --git a/sysdeps/i386/i686/multiarch/strcpy-stosb.S b/sysdeps/i386/i686/multiarch/strcpy-stosb.S
new file mode 100644
index 0000000..36e4412
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/strcpy-stosb.S
@@ -0,0 +1,66 @@
+/* strcpy/strcpy optimized with stosb.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+#define PARMS 4+8 /* space for 2 saved regs */
+#define DEST PARMS
+#define SRC DEST+4
+
+#ifndef USE_AS_STPCPY
+# define STRCPY __strcpy_stosb
+#endif
+
+ .text
+ENTRY (STRCPY)
+ pushl %edi
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (%edi, 4)
+ pushl %esi
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (%esi, 0)
+
+ movl DEST(%esp), %edi
+ movl SRC(%esp), %esi
+
+#ifndef USE_AS_STPCPY
+ movl %edi, %edx
+#endif
+
+L(repeat):
+ lodsb
+ stosb
+ testb %al,%al
+ jne L(repeat)
+
+#ifdef USE_AS_STPCPY
+ leal -1(%edi), %eax
+#else
+ movl %edx, %eax
+#endif
+
+ popl %esi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (%esi)
+ popl %edi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (%edi)
+
+ ret
+END (STRCPY)
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=2fa584ae979326fd9cb33e6df9c50976643cbd84
commit 2fa584ae979326fd9cb33e6df9c50976643cbd84
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Fri Oct 27 03:09:12 2017 -0700
Add i386 strcpy.S
diff --git a/sysdeps/i386/stpcpy.S b/sysdeps/i386/stpcpy.S
index d9981b6..5e9860a 100644
--- a/sysdeps/i386/stpcpy.S
+++ b/sysdeps/i386/stpcpy.S
@@ -1,87 +1,7 @@
-/* Copy SRC to DEST returning the address of the terminating '\0' in DEST.
- For Intel 80x86, x>=3.
- Copyright (C) 1994-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Ulrich Drepper (drepper@gnu.ai.mit.edu).
+#define USE_AS_STPCPY
+#define STRCPY __stpcpy
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-/* This function is defined neither in ANSI nor POSIX standards but is
- also not invented here. */
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-
-#define PARMS 4 /* no space for saved regs */
-#define RTN PARMS
-#define DEST RTN
-#define SRC DEST+4
-
- .text
-ENTRY (__stpcpy)
-
- movl DEST(%esp), %eax
- movl SRC(%esp), %ecx
- subl %eax, %ecx /* magic: reduce number of loop variants
- to one using addressing mode */
-
- /* Here we would like to write
-
- subl $4, %eax
- ALIGN (4)
-
- but the assembler is too smart and optimizes for the shortest
- form where the number only needs one byte. But if we could
- have the long form we would not need the alignment. */
-
- .byte 0x81, 0xe8 /* This is `subl $0x00000004, %eax' */
- .long 0x00000004
-
- /* Four times unfolded loop with only one loop counter. This
- is achieved by the use of index+base addressing mode. As the
- loop counter we use the destination address because this is
- also the result. */
-L(1): addl $4, %eax /* increment loop counter */
-
- movb (%eax,%ecx), %dl /* load current char */
- movb %dl, (%eax) /* and store it */
- testb %dl, %dl /* was it NUL? */
- jz L(2) /* yes, then exit */
-
- movb 1(%eax,%ecx), %dl /* load current char */
- movb %dl, 1(%eax) /* and store it */
- testb %dl, %dl /* was it NUL? */
- jz L(3) /* yes, then exit */
-
- movb 2(%eax,%ecx), %dl /* load current char */
- movb %dl, 2(%eax) /* and store it */
- testb %dl, %dl /* was it NUL? */
- jz L(4) /* yes, then exit */
-
- movb 3(%eax,%ecx), %dl /* load current char */
- movb %dl, 3(%eax) /* and store it */
- testb %dl, %dl /* was it NUL? */
- jnz L(1) /* no, then continue loop */
-
- incl %eax /* correct loop counter */
-L(4): incl %eax
-L(3): incl %eax
-L(2):
-
- ret
-END (__stpcpy)
+#include <sysdeps/i386/strcpy.S>
weak_alias (__stpcpy, stpcpy)
libc_hidden_def (__stpcpy)
diff --git a/sysdeps/i386/stpcpy.S b/sysdeps/i386/strcpy.S
similarity index 52%
copy from sysdeps/i386/stpcpy.S
copy to sysdeps/i386/strcpy.S
index d9981b6..bd00332 100644
--- a/sysdeps/i386/stpcpy.S
+++ b/sysdeps/i386/strcpy.S
@@ -18,25 +18,36 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-/* This function is defined neither in ANSI nor POSIX standards but is
- also not invented here. */
-
#include <sysdep.h>
#include "asm-syntax.h"
-#define PARMS 4 /* no space for saved regs */
-#define RTN PARMS
-#define DEST RTN
-#define SRC DEST+4
+#ifdef USE_AS_STPCPY
+# define PARMS 4 /* no space for saved regs */
+# define STORE_ADDR %eax
+#else
+# define PARMS 4+4 /* space for 1 saved reg */
+# define STORE_ADDR %ebx
+# define STRCPY strcpy
+#endif
+
+#define DEST PARMS
+#define SRC DEST+4
.text
-ENTRY (__stpcpy)
+ENTRY (STRCPY)
+
+#ifndef USE_AS_STPCPY
+ pushl %ebx
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (%ebx, 0)
+#endif
- movl DEST(%esp), %eax
+ movl DEST(%esp), STORE_ADDR
movl SRC(%esp), %ecx
- subl %eax, %ecx /* magic: reduce number of loop variants
+ subl STORE_ADDR, %ecx /* magic: reduce number of loop variants
to one using addressing mode */
+#ifdef USE_AS_STPCPY
/* Here we would like to write
subl $4, %eax
@@ -48,41 +59,55 @@ ENTRY (__stpcpy)
.byte 0x81, 0xe8 /* This is `subl $0x00000004, %eax' */
.long 0x00000004
+#else
+ movl STORE_ADDR, %eax
+ subl $4, STORE_ADDR
+ .p2align 4
+#endif
/* Four times unfolded loop with only one loop counter. This
is achieved by the use of index+base addressing mode. As the
loop counter we use the destination address because this is
also the result. */
-L(1): addl $4, %eax /* increment loop counter */
-
- movb (%eax,%ecx), %dl /* load current char */
- movb %dl, (%eax) /* and store it */
- testb %dl, %dl /* was it NUL? */
- jz L(2) /* yes, then exit */
-
- movb 1(%eax,%ecx), %dl /* load current char */
- movb %dl, 1(%eax) /* and store it */
- testb %dl, %dl /* was it NUL? */
- jz L(3) /* yes, then exit */
-
- movb 2(%eax,%ecx), %dl /* load current char */
- movb %dl, 2(%eax) /* and store it */
- testb %dl, %dl /* was it NUL? */
- jz L(4) /* yes, then exit */
-
- movb 3(%eax,%ecx), %dl /* load current char */
- movb %dl, 3(%eax) /* and store it */
- testb %dl, %dl /* was it NUL? */
- jnz L(1) /* no, then continue loop */
-
- incl %eax /* correct loop counter */
+L(1): addl $4, STORE_ADDR /* increment loop counter */
+
+ movb (STORE_ADDR,%ecx), %dl /* load current char */
+ movb %dl, (STORE_ADDR) /* and store it */
+ testb %dl, %dl /* was it NUL? */
+ jz L(2) /* yes, then exit */
+
+ movb 1(STORE_ADDR,%ecx), %dl /* load current char */
+ movb %dl, 1(STORE_ADDR) /* and store it */
+ testb %dl, %dl /* was it NUL? */
+ jz L(3) /* yes, then exit */
+
+ movb 2(STORE_ADDR,%ecx), %dl /* load current char */
+ movb %dl, 2(STORE_ADDR) /* and store it */
+ testb %dl, %dl /* was it NUL? */
+ jz L(4) /* yes, then exit */
+
+ movb 3(STORE_ADDR,%ecx), %dl /* load current char */
+ movb %dl, 3(STORE_ADDR) /* and store it */
+ testb %dl, %dl /* was it NUL? */
+ jnz L(1) /* no, then continue loop */
+
+#ifdef USE_AS_STPCPY
+ incl %eax /* correct loop counter */
L(4): incl %eax
L(3): incl %eax
L(2):
+#else
+L(4):
+L(3):
+L(2):
+ popl %ebx
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (%ebx)
+#endif
ret
-END (__stpcpy)
+END (STRCPY)
-weak_alias (__stpcpy, stpcpy)
-libc_hidden_def (__stpcpy)
-libc_hidden_builtin_def (stpcpy)
+#ifndef USE_AS_STPCPY
+libc_hidden_builtin_def (strcpy)
+#endif
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=d78abd26b8c8da5b9d67773343824606f9c96a6a
commit d78abd26b8c8da5b9d67773343824606f9c96a6a
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Fri Oct 27 02:31:13 2017 -0700
i586: Use a jump table in strcpy.S [BZ #22353]
i586 strcpy.S used a clever trick with LEA to avoid jump table:
/* ECX has the last 2 bits of the address of source - 1. */
andl $3, %ecx
call 2f
2: popl %edx
/* 0xb is the distance between 2: and 1:. */
leal 0xb(%edx,%ecx,8), %ecx
jmp *%ecx
.align 8
1: /* ECX == 0 */
orb (%esi), %al
jz L(end)
stosb
xorl %eax, %eax
incl %esi
/* ECX == 1 */
orb (%esi), %al
jz L(end)
stosb
xorl %eax, %eax
incl %esi
/* ECX == 2 */
orb (%esi), %al
jz L(end)
stosb
xorl %eax, %eax
incl %esi
/* ECX == 3 */
L(1): movl (%esi), %ecx
leal 4(%esi),%esi
This may fail if there are instruction changes before L(1):. This patch
replaces it with a jump table which works with any instruction changes.
Tested on i586 and i686 with and without --disable-multi-arch.
[BZ #22353]
* sysdeps/i386/i586/strcpy.S (JMPTBL): New.
(BRANCH_TO_JMPTBL_ENTRY): Likewise.
(STRCPY): Use it.
(1): Renamed to ...
(L(Src0)): This.
(L(Src1)): New.
(L(Src2)): Likewise.
(L(1)): Renamed to ...
(L(Src3)): This.
(L(SrcTable)): New.
diff --git a/sysdeps/i386/i586/strcpy.S b/sysdeps/i386/i586/strcpy.S
index a444604..9384242 100644
--- a/sysdeps/i386/i586/strcpy.S
+++ b/sysdeps/i386/i586/strcpy.S
@@ -29,6 +29,34 @@
# define STRCPY strcpy
#endif
+#ifdef PIC
+# define JMPTBL(I, B) I - B
+
+/* Load an entry in a jump table into EDX and branch to it. TABLE is a
+ jump table with relative offsets. INDEX is a register contains the
+ index into the jump table. SCALE is the scale of INDEX. */
+
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
+ /* We first load PC into EDX. */ \
+ SETUP_PIC_REG(dx); \
+ /* Get the address of the jump table. */ \
+ addl $(TABLE - .), %edx; \
+ /* Get the entry and convert the relative offset to the \
+ absolute address. */ \
+ addl (%edx,INDEX,SCALE), %edx; \
+ /* We loaded the jump table and adjusted EDX. Go. */ \
+ jmp *%edx
+#else
+# define JMPTBL(I, B) I
+
+/* Branch to an entry in a jump table. TABLE is a jump table with
+ absolute offsets. INDEX is a register contains the index into the
+ jump table. SCALE is the scale of INDEX. */
+
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
+ jmp *TABLE(,INDEX,SCALE)
+#endif
+
#define magic 0xfefefeff
.text
@@ -53,41 +81,32 @@ ENTRY (STRCPY)
cfi_rel_offset (ebx, 0)
andl $3, %ecx
-#ifdef PIC
- call 2f
- cfi_adjust_cfa_offset (4)
-2: popl %edx
- cfi_adjust_cfa_offset (-4)
- /* 0xb is the distance between 2: and 1: but we avoid writing
- 1f-2b because the assembler generates worse code. */
- leal 0xb(%edx,%ecx,8), %ecx
-#else
- leal 1f(,%ecx,8), %ecx
-#endif
-
- jmp *%ecx
+ BRANCH_TO_JMPTBL_ENTRY (L(SrcTable), %ecx, 4)
- .align 8
-1:
+ .p2align 4
+L(Src0):
orb (%esi), %al
jz L(end)
stosb
xorl %eax, %eax
incl %esi
+L(Src1):
orb (%esi), %al
jz L(end)
stosb
xorl %eax, %eax
incl %esi
+L(Src2):
orb (%esi), %al
jz L(end)
stosb
xorl %eax, %eax
incl %esi
-L(1): movl (%esi), %ecx
+L(Src3):
+ movl (%esi), %ecx
leal 4(%esi),%esi
subl %ecx, %eax
@@ -107,7 +126,7 @@ L(1): movl (%esi), %ecx
movl %edx, (%edi)
leal 4(%edi),%edi
- jmp L(1)
+ jmp L(Src3)
L(3): movl %ecx, %edx
@@ -164,6 +183,15 @@ L(end2):
ret
END (STRCPY)
+
+ .p2align 2
+ .section .rodata
+L(SrcTable):
+ .int JMPTBL (L(Src0), L(SrcTable))
+ .int JMPTBL (L(Src1), L(SrcTable))
+ .int JMPTBL (L(Src2), L(SrcTable))
+ .int JMPTBL (L(Src3), L(SrcTable))
+
#ifndef USE_AS_STPCPY
libc_hidden_builtin_def (strcpy)
#endif
-----------------------------------------------------------------------
hooks/post-receive
--
GNU C Library master sources