This is the mail archive of the
glibc-cvs@sourceware.org
mailing list for the glibc project.
GNU C Library master sources branch hjl/x86/optimize created. glibc-2.25-310-ga1235ff
- From: hjl at sourceware dot org
- To: glibc-cvs at sourceware dot org
- Date: 11 May 2017 15:31:40 -0000
- Subject: GNU C Library master sources branch hjl/x86/optimize created. glibc-2.25-310-ga1235ff
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".
The branch, hjl/x86/optimize has been created
at a1235ffcf2833b5eda1d86f1a99c4f0fe084cc8b (commit)
- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=a1235ffcf2833b5eda1d86f1a99c4f0fe084cc8b
commit a1235ffcf2833b5eda1d86f1a99c4f0fe084cc8b
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Wed May 10 16:05:09 2017 -0700
Integrate memcpy_benchmark.cc with glibc benchtests
diff --git a/benchtests/memcpy_benchmark.cc b/benchtests/memcpy_benchmark.cc
index 51dff26..979373c 100644
--- a/benchtests/memcpy_benchmark.cc
+++ b/benchtests/memcpy_benchmark.cc
@@ -24,6 +24,13 @@
#include <map>
#include <string>
+#define TEST_MAIN
+#define TEST_NAME "memcpy"
+#include "bench-string.h"
+
+typedef char *(*proto_t) (char *, const char *, size_t);
+IMPL (memcpy, 1)
+
std::chrono::time_point<std::chrono::high_resolution_clock> start;
std::chrono::time_point<std::chrono::high_resolution_clock> stop;
size_t bytes;
@@ -35,7 +42,7 @@ int size_list[] = {1 << 14, 1 << 15, 1 << 16, 1 << 17, 1 << 18, 1 << 19,
1 << 20, 1 << 21, 1 << 22, 1 << 23, 1 << 24, 1 << 25, 1 << 26};
size_t buffer_size = 1 << 28;
-void BM_memcpy_readwritecache(int iters, int size) {
+void BM_memcpy_readwritecache(impl_t *impl, int iters, int size) {
unsigned char * buf1 = new unsigned char [size];
unsigned char * buf2 = new unsigned char [size];
@@ -43,14 +50,14 @@ void BM_memcpy_readwritecache(int iters, int size) {
start_timing();
for (int i = 0; i < iters; ++i) {
- memcpy(buf2, buf1, size);
+ CALL(impl, buf2, buf1, size);
}
stop_timing();
delete[] buf1; delete[] buf2;
}
-void BM_memcpy_nocache(int iters, int size) {
+void BM_memcpy_nocache(impl_t *impl, int iters, int size) {
unsigned char * buf1 = new unsigned char [buffer_size];
unsigned char * buf2 = new unsigned char [buffer_size];
@@ -59,7 +66,7 @@ void BM_memcpy_nocache(int iters, int size) {
size_t offset = 0;
start_timing();
for (int i = 0; i < iters; ++i) {
- memcpy(buf2 + offset, buf1 + offset, size);
+ CALL(impl, buf2 + offset, buf1 + offset, size);
offset += std::max(4097, size + 1);
if (offset >= buffer_size - size) offset = 0;
}
@@ -68,7 +75,7 @@ void BM_memcpy_nocache(int iters, int size) {
delete[] buf1; delete[] buf2;
}
-void BM_memcpy_readcache(int iters, int size) {
+void BM_memcpy_readcache(impl_t *impl, int iters, int size) {
unsigned char * buf1 = new unsigned char [size];
unsigned char * buf2 = new unsigned char [buffer_size];
@@ -77,7 +84,7 @@ void BM_memcpy_readcache(int iters, int size) {
size_t offset = 0;
start_timing();
for (int i = 0; i < iters; ++i) {
- memcpy(buf2 + offset, buf1, size);
+ CALL(impl, buf2 + offset, buf1, size);
offset += std::max(4097, size + 1);
if (offset >= buffer_size - size) offset = 0;
}
@@ -86,30 +93,42 @@ void BM_memcpy_readcache(int iters, int size) {
delete[] buf1; delete[] buf2;
}
-double do_timing(std::function<void(int, int)> &fn, int size) {
+double do_timing(std::function<void(impl_t *, int, int)> &fn, impl_t *impl, int size) {
int iters = 2; double time = 0;
while (time < 500) {
iters *= 3;
- fn(iters, size);
+ fn(impl, iters, size);
time = std::chrono::duration_cast<std::chrono::milliseconds>(stop - start).count();
bytes = (2UL * iters * size);
}
return time;
}
-std::map<std::string, std::function<void(int, int)>> schemes =
+std::map<std::string, std::function<void(impl_t *, int, int)>> schemes =
{{"Read and Write Cache", BM_memcpy_readwritecache},
{"No Cache", BM_memcpy_nocache},
{"Read Cache", BM_memcpy_readcache}};
-int main(void) {
+void test(impl_t *impl) {
std::cout << " Size (bytes) Time (msec) BW (Gbytes/sec)" << std::endl;
for (auto scheme : schemes) {
std::cout << scheme.first << std::endl;
for (auto size : size_list) {
- int time = do_timing(scheme.second, size);
+ int time = do_timing(scheme.second, impl, size);
printf("%12d %10d %10.2f\n", size, time, (bytes * 1000L / time) / 1e9);
}
std::cout << "----------------\n";
}
-}
\ No newline at end of file
+ return 0;
+}
+
+int test_main(void) {
+ test_init ();
+ FOR_EACH_IMPL (impl, 0)
+ {
+ std::cout << impl->name << std::endl;
+ test (impl);
+ }
+ return 0;
+}
+#include <support/test-driver.c>
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=946540cecbb926142a06e805e9f91a854b74f75c
commit 946540cecbb926142a06e805e9f91a854b74f75c
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Wed May 10 15:25:54 2017 -0700
Build memcpy_benchmark in benchtests
Compile memcpy_benchmark.cc with -fpermissive -Wno-error to silence GCC.
diff --git a/benchtests/Makefile b/benchtests/Makefile
index 7f5fda5..79fab64 100644
--- a/benchtests/Makefile
+++ b/benchtests/Makefile
@@ -99,6 +99,12 @@ binaries-bench := $(addprefix $(objpfx)bench-,$(bench))
binaries-benchset := $(addprefix $(objpfx)bench-,$(benchset))
binaries-bench-malloc := $(addprefix $(objpfx)bench-,$(bench-malloc))
+ifneq (,$(CXX))
+binaries-bench += $(objpfx)memcpy_benchmark
+CFLAGS-memcpy_benchmark.cc = -fpermissive -Wno-error
+LDLIBS-memcpy_benchmark = -lstdc++
+endif
+
# The default duration: 10 seconds.
ifndef BENCH_DURATION
BENCH_DURATION := 10
@@ -122,6 +128,9 @@ endif
# for all these modules.
cpp-srcs-left := $(binaries-benchset:=.c) $(binaries-bench:=.c) \
$(binaries-bench-malloc:=.c)
+ifneq (,$(CXX))
+cpp-srcs-left += memcpy_benchmark.cc
+endif
lib := nonlib
include $(patsubst %,$(..)libof-iterator.mk,$(cpp-srcs-left))
diff --git a/string/memcpy_benchmark.cc b/benchtests/memcpy_benchmark.cc
similarity index 100%
rename from string/memcpy_benchmark.cc
rename to benchtests/memcpy_benchmark.cc
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=a8d789ae9105110494e2f550d415eba9642f7b92
commit a8d789ae9105110494e2f550d415eba9642f7b92
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Wed May 10 14:30:08 2017 -0700
Import memcpy_benchmark.cc
From
https://gist.github.com/ekelsen/b66cc085eb39f0495b57679cdb1874fa
diff --git a/string/memcpy_benchmark.cc b/string/memcpy_benchmark.cc
new file mode 100644
index 0000000..51dff26
--- /dev/null
+++ b/string/memcpy_benchmark.cc
@@ -0,0 +1,115 @@
+/* Copyright 2017 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ======================================================================*/
+
+#if !defined DO_STRING_INLINES
+#undef __USE_STRING_INLINES
+#endif
+
+#include <string.h>
+#include <chrono>
+#include <iostream>
+#include <functional>
+#include <map>
+#include <string>
+
+std::chrono::time_point<std::chrono::high_resolution_clock> start;
+std::chrono::time_point<std::chrono::high_resolution_clock> stop;
+size_t bytes;
+
+void start_timing() { start = std::chrono::high_resolution_clock::now(); }
+void stop_timing() { stop = std::chrono::high_resolution_clock::now(); }
+
+int size_list[] = {1 << 14, 1 << 15, 1 << 16, 1 << 17, 1 << 18, 1 << 19,
+ 1 << 20, 1 << 21, 1 << 22, 1 << 23, 1 << 24, 1 << 25, 1 << 26};
+size_t buffer_size = 1 << 28;
+
+void BM_memcpy_readwritecache(int iters, int size) {
+ unsigned char * buf1 = new unsigned char [size];
+ unsigned char * buf2 = new unsigned char [size];
+
+ memset (buf1, 0xa5, size); memset (buf2, 0x5a, size);
+
+ start_timing();
+ for (int i = 0; i < iters; ++i) {
+ memcpy(buf2, buf1, size);
+ }
+ stop_timing();
+
+ delete[] buf1; delete[] buf2;
+}
+
+void BM_memcpy_nocache(int iters, int size) {
+ unsigned char * buf1 = new unsigned char [buffer_size];
+ unsigned char * buf2 = new unsigned char [buffer_size];
+
+ memset (buf1, 0xa5, buffer_size); memset (buf2, 0x5a, buffer_size);
+
+ size_t offset = 0;
+ start_timing();
+ for (int i = 0; i < iters; ++i) {
+ memcpy(buf2 + offset, buf1 + offset, size);
+ offset += std::max(4097, size + 1);
+ if (offset >= buffer_size - size) offset = 0;
+ }
+ stop_timing();
+
+ delete[] buf1; delete[] buf2;
+}
+
+void BM_memcpy_readcache(int iters, int size) {
+ unsigned char * buf1 = new unsigned char [size];
+ unsigned char * buf2 = new unsigned char [buffer_size];
+
+ memset (buf1, 0xa5, size); memset (buf2, 0x5a, buffer_size);
+
+ size_t offset = 0;
+ start_timing();
+ for (int i = 0; i < iters; ++i) {
+ memcpy(buf2 + offset, buf1, size);
+ offset += std::max(4097, size + 1);
+ if (offset >= buffer_size - size) offset = 0;
+ }
+ stop_timing();
+
+ delete[] buf1; delete[] buf2;
+}
+
+double do_timing(std::function<void(int, int)> &fn, int size) {
+ int iters = 2; double time = 0;
+ while (time < 500) {
+ iters *= 3;
+ fn(iters, size);
+ time = std::chrono::duration_cast<std::chrono::milliseconds>(stop - start).count();
+ bytes = (2UL * iters * size);
+ }
+ return time;
+}
+
+std::map<std::string, std::function<void(int, int)>> schemes =
+ {{"Read and Write Cache", BM_memcpy_readwritecache},
+ {"No Cache", BM_memcpy_nocache},
+ {"Read Cache", BM_memcpy_readcache}};
+
+int main(void) {
+ std::cout << " Size (bytes) Time (msec) BW (Gbytes/sec)" << std::endl;
+ for (auto scheme : schemes) {
+ std::cout << scheme.first << std::endl;
+ for (auto size : size_list) {
+ int time = do_timing(scheme.second, size);
+ printf("%12d %10d %10.2f\n", size, time, (bytes * 1000L / time) / 1e9);
+ }
+ std::cout << "----------------\n";
+ }
+}
\ No newline at end of file
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=5fae97b3291898f9270e46adc891ed2d1a6ea1cb
commit 5fae97b3291898f9270e46adc891ed2d1a6ea1cb
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Wed May 10 10:21:08 2017 -0700
x86-64: Restore memcpy-sse2-unaligned.S from glibc 2.19
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 2a30538..5ed4e74 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -23,7 +23,7 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \
memmove-avx512-unaligned-erms \
memset-avx2-unaligned-erms \
memset-avx512-unaligned-erms \
- strlen-sse4
+ strlen-sse4 memcpy-sse2-unaligned
CFLAGS-varshift.c += -msse4
CFLAGS-strcspn-c.c += -msse4
CFLAGS-strpbrk-c.c += -msse4
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 1604678..653716e 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -353,6 +353,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2_unaligned)
IFUNC_IMPL_ADD (array, i, memcpy, 1,
__memcpy_sse2_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, memcpy, 1,
+ __memcpy_sse2_unaligned_2_19)
IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_erms))
/* Support sysdeps/x86_64/multiarch/mempcpy_chk.S. */
diff --git a/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S b/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
new file mode 100644
index 0000000..1d05c2c
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
@@ -0,0 +1,171 @@
+/* memcpy with unaliged loads
+ Copyright (C) 2013-2014 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+#include "asm-syntax.h"
+
+
+ENTRY(__memcpy_sse2_unaligned_2_19)
+ movq %rsi, %rax
+ leaq (%rdx,%rdx), %rcx
+ subq %rdi, %rax
+ subq %rdx, %rax
+ cmpq %rcx, %rax
+ jb L(overlapping)
+ cmpq $16, %rdx
+ jbe L(less_16)
+ movdqu (%rsi), %xmm8
+ cmpq $32, %rdx
+ movdqu %xmm8, (%rdi)
+ movdqu -16(%rsi,%rdx), %xmm8
+ movdqu %xmm8, -16(%rdi,%rdx)
+ ja .L31
+L(return):
+ movq %rdi, %rax
+ ret
+ .p2align 4,,10
+ .p2align 4
+.L31:
+ movdqu 16(%rsi), %xmm8
+ cmpq $64, %rdx
+ movdqu %xmm8, 16(%rdi)
+ movdqu -32(%rsi,%rdx), %xmm8
+ movdqu %xmm8, -32(%rdi,%rdx)
+ jbe L(return)
+ movdqu 32(%rsi), %xmm8
+ cmpq $128, %rdx
+ movdqu %xmm8, 32(%rdi)
+ movdqu -48(%rsi,%rdx), %xmm8
+ movdqu %xmm8, -48(%rdi,%rdx)
+ movdqu 48(%rsi), %xmm8
+ movdqu %xmm8, 48(%rdi)
+ movdqu -64(%rsi,%rdx), %xmm8
+ movdqu %xmm8, -64(%rdi,%rdx)
+ jbe L(return)
+ leaq 64(%rdi), %rcx
+ addq %rdi, %rdx
+ andq $-64, %rdx
+ andq $-64, %rcx
+ movq %rcx, %rax
+ subq %rdi, %rax
+ addq %rax, %rsi
+ cmpq %rdx, %rcx
+ je L(return)
+ movq %rsi, %r10
+ subq %rcx, %r10
+ leaq 16(%r10), %r9
+ leaq 32(%r10), %r8
+ leaq 48(%r10), %rax
+ .p2align 4,,10
+ .p2align 4
+L(loop):
+ movdqu (%rcx,%r10), %xmm8
+ movdqa %xmm8, (%rcx)
+ movdqu (%rcx,%r9), %xmm8
+ movdqa %xmm8, 16(%rcx)
+ movdqu (%rcx,%r8), %xmm8
+ movdqa %xmm8, 32(%rcx)
+ movdqu (%rcx,%rax), %xmm8
+ movdqa %xmm8, 48(%rcx)
+ addq $64, %rcx
+ cmpq %rcx, %rdx
+ jne L(loop)
+ jmp L(return)
+L(overlapping):
+ cmpq %rsi, %rdi
+ jae .L3
+ testq %rdx, %rdx
+ .p2align 4,,5
+ je L(return)
+ movq %rdx, %r9
+ leaq 16(%rsi), %rcx
+ leaq 16(%rdi), %r8
+ shrq $4, %r9
+ movq %r9, %rax
+ salq $4, %rax
+ cmpq %rcx, %rdi
+ setae %cl
+ cmpq %r8, %rsi
+ setae %r8b
+ orl %r8d, %ecx
+ cmpq $15, %rdx
+ seta %r8b
+ testb %r8b, %cl
+ je .L16
+ testq %rax, %rax
+ je .L16
+ xorl %ecx, %ecx
+ xorl %r8d, %r8d
+.L7:
+ movdqu (%rsi,%rcx), %xmm8
+ addq $1, %r8
+ movdqu %xmm8, (%rdi,%rcx)
+ addq $16, %rcx
+ cmpq %r8, %r9
+ ja .L7
+ cmpq %rax, %rdx
+ je L(return)
+.L21:
+ movzbl (%rsi,%rax), %ecx
+ movb %cl, (%rdi,%rax)
+ addq $1, %rax
+ cmpq %rax, %rdx
+ ja .L21
+ jmp L(return)
+L(less_16):
+ testb $24, %dl
+ jne L(between_9_16)
+ testb $4, %dl
+ .p2align 4,,5
+ jne L(between_5_8)
+ testq %rdx, %rdx
+ .p2align 4,,2
+ je L(return)
+ movzbl (%rsi), %eax
+ testb $2, %dl
+ movb %al, (%rdi)
+ je L(return)
+ movzwl -2(%rsi,%rdx), %eax
+ movw %ax, -2(%rdi,%rdx)
+ jmp L(return)
+.L3:
+ leaq -1(%rdx), %rax
+ .p2align 4,,10
+ .p2align 4
+.L11:
+ movzbl (%rsi,%rax), %edx
+ movb %dl, (%rdi,%rax)
+ subq $1, %rax
+ jmp .L11
+L(between_9_16):
+ movq (%rsi), %rax
+ movq %rax, (%rdi)
+ movq -8(%rsi,%rdx), %rax
+ movq %rax, -8(%rdi,%rdx)
+ jmp L(return)
+.L16:
+ xorl %eax, %eax
+ jmp .L21
+L(between_5_8):
+ movl (%rsi), %eax
+ movl %eax, (%rdi)
+ movl -4(%rsi,%rdx), %eax
+ movl %eax, -4(%rdi,%rdx)
+ jmp L(return)
+END(__memcpy_sse2_unaligned_2_19)
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=c8d89fc5c219d206d1ac792348bb15f218ae42bb
commit c8d89fc5c219d206d1ac792348bb15f218ae42bb
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Mon May 1 08:32:22 2017 -0700
x86-64: Restore the old SSE4 strlen
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 3736f54..2a30538 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -22,7 +22,8 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \
memmove-avx-unaligned-erms \
memmove-avx512-unaligned-erms \
memset-avx2-unaligned-erms \
- memset-avx512-unaligned-erms
+ memset-avx512-unaligned-erms \
+ strlen-sse4
CFLAGS-varshift.c += -msse4
CFLAGS-strcspn-c.c += -msse4
CFLAGS-strpbrk-c.c += -msse4
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 06d9a9d..1604678 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -410,6 +410,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__mempcpy_sse2_unaligned_erms)
IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_erms))
+ /* Support sysdeps/x86_64/multiarch/strlen.S. */
+ IFUNC_IMPL (i, name, strlen,
+ IFUNC_IMPL_ADD (array, i, strlen, HAS_CPU_FEATURE (SSE4_2),
+ __strlen_sse42)
+ IFUNC_IMPL_ADD (array, i, strlen, 1, strlen))
+
/* Support sysdeps/x86_64/multiarch/strncmp.S. */
IFUNC_IMPL (i, name, strncmp,
IFUNC_IMPL_ADD (array, i, strncmp, HAS_CPU_FEATURE (SSE4_2),
diff --git a/sysdeps/x86_64/multiarch/strlen-sse4.S b/sysdeps/x86_64/multiarch/strlen-sse4.S
new file mode 100644
index 0000000..8d685df
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strlen-sse4.S
@@ -0,0 +1,84 @@
+/* strlen with SSE4
+ Copyright (C) 2009-2013 Free Software Foundation, Inc.
+ Contributed by Ulrich Drepper <drepper@redhat.com>.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if defined SHARED && !defined NOT_IN_libc
+
+#include <sysdep.h>
+
+ .section .text.sse4.2,"ax",@progbits
+ENTRY (__strlen_sse42)
+ pxor %xmm1, %xmm1
+ movl %edi, %ecx
+ movq %rdi, %r8
+ andq $~15, %rdi
+ xor %edi, %ecx
+ pcmpeqb (%rdi), %xmm1
+ pmovmskb %xmm1, %edx
+ shrl %cl, %edx
+ shll %cl, %edx
+ andl %edx, %edx
+ jnz L(less16bytes)
+ pxor %xmm1, %xmm1
+
+ .p2align 4
+L(more64bytes_loop):
+ pcmpistri $0x08, 16(%rdi), %xmm1
+ jz L(more32bytes)
+
+ pcmpistri $0x08, 32(%rdi), %xmm1
+ jz L(more48bytes)
+
+ pcmpistri $0x08, 48(%rdi), %xmm1
+ jz L(more64bytes)
+
+ add $64, %rdi
+ pcmpistri $0x08, (%rdi), %xmm1
+ jnz L(more64bytes_loop)
+ leaq (%rdi,%rcx), %rax
+ subq %r8, %rax
+ ret
+
+ .p2align 4
+L(more32bytes):
+ leaq 16(%rdi,%rcx, 1), %rax
+ subq %r8, %rax
+ ret
+
+ .p2align 4
+L(more48bytes):
+ leaq 32(%rdi,%rcx, 1), %rax
+ subq %r8, %rax
+ ret
+
+ .p2align 4
+L(more64bytes):
+ leaq 48(%rdi,%rcx, 1), %rax
+ subq %r8, %rax
+ ret
+
+ .p2align 4
+L(less16bytes):
+ subq %r8, %rdi
+ bsfl %edx, %eax
+ addq %rdi, %rax
+ ret
+
+END (__strlen_sse42)
+
+#endif
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=5eb2c187b50fdc0849f60a966fbc0ff0f407fd3d
commit 5eb2c187b50fdc0849f60a966fbc0ff0f407fd3d
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Wed May 10 16:02:56 2017 -0700
Add __BEGIN_DECLS and __END_DECLS for C++
Add __BEGIN_DECLS and __END_DECLS to support C++. IFUNC_IMPL_ADD and
IFUNC_IMPL are used internally in libc. They shouldn't be used in any
programs.
* include/ifunc-impl-list.h: Add __BEGIN_DECLS and __END_DECLS.
(IFUNC_IMPL_ADD, IFUNC_IMPL): Define only if __cplusplus isn't
defined.
diff --git a/include/ifunc-impl-list.h b/include/ifunc-impl-list.h
index 22ca05f..7d53f11 100644
--- a/include/ifunc-impl-list.h
+++ b/include/ifunc-impl-list.h
@@ -22,6 +22,8 @@
#include <stdbool.h>
#include <stddef.h>
+__BEGIN_DECLS
+
struct libc_ifunc_impl
{
/* The name of function to be tested. */
@@ -32,20 +34,25 @@ struct libc_ifunc_impl
bool usable;
};
+#ifndef __cplusplus
+/* NB: IFUNC_IMPL_ADD and IFUNC_IMPL are used internally in libc. They
+ shouldn't be used in any programs. */
+
/* Add an IFUNC implementation, IMPL, for function FUNC, to ARRAY with
USABLE at index I and advance I by one. */
-#define IFUNC_IMPL_ADD(array, i, func, usable, impl) \
+# define IFUNC_IMPL_ADD(array, i, func, usable, impl) \
extern __typeof (func) impl attribute_hidden; \
(array)[i++] = (struct libc_ifunc_impl) { #impl, (void (*) (void)) impl, (usable) };
/* Return the number of IFUNC implementations, N, for function FUNC if
string NAME matches FUNC. */
-#define IFUNC_IMPL(n, name, func, ...) \
+# define IFUNC_IMPL(n, name, func, ...) \
if (strcmp (name, #func) == 0) \
{ \
__VA_ARGS__; \
return n; \
}
+#endif /* __cplusplus */
/* Fill ARRAY of MAX elements with IFUNC implementations for function
NAME and return the number of valid entries. */
@@ -53,4 +60,6 @@ extern size_t __libc_ifunc_impl_list (const char *name,
struct libc_ifunc_impl *array,
size_t max);
+__END_DECLS
+
#endif /* ifunc-impl-list.h */
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=0741324e81f236e5f0cf33e32f4922a397f256fd
commit 0741324e81f236e5f0cf33e32f4922a397f256fd
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Wed May 10 14:54:22 2017 -0700
Check __cplusplus in addition to _ISOMAC
When compiling for C++, only include <wctype/wctype.h> and nothing else.
* include/wctype.h: Check __cplusplus in addition to _ISOMAC.
diff --git a/include/wctype.h b/include/wctype.h
index a71b103..74f9f47 100644
--- a/include/wctype.h
+++ b/include/wctype.h
@@ -1,6 +1,6 @@
#ifndef _WCTYPE_H
-#ifndef _ISOMAC
+#if !defined _ISOMAC && !defined __cplusplus
/* We try to get wint_t from <stddef.h>, but not all GCC versions define it
there. So define it ourselves if it remains undefined. */
# define __need_wint_t
@@ -38,7 +38,7 @@ libc_hidden_proto (towupper)
#include <wctype/wctype.h>
-#ifndef _ISOMAC
+#if !defined _ISOMAC && !defined __cplusplus
/* Internal interfaces. */
extern int __iswspace (wint_t __wc);
extern int __iswctype (wint_t __wc, wctype_t __desc);
-----------------------------------------------------------------------
hooks/post-receive
--
GNU C Library master sources