This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH v2] Fix rawmemchr regression on bulldozer
- From: Liubov Dmitrieva <liubov dot dmitrieva at gmail dot com>
- To: Ondřej Bílka <neleai at seznam dot cz>
- Cc: Andreas Jaeger <aj at suse dot com>, GNU C Library <libc-alpha at sourceware dot org>
- Date: Thu, 29 Aug 2013 18:24:00 +0400
- Subject: Re: [PATCH v2] Fix rawmemchr regression on bulldozer
- Authentication-results: sourceware.org; auth=none
- References: <20130805122624 dot GA4682 at domone dot kolej dot mff dot cuni dot cz> <521CDBF1 dot 7020005 at suse dot com> <CAHjhQ91k+j-rfCbJ9vx-B0ZmXaVUAT3X_xbGCgFcvdtP1dLkGA at mail dot gmail dot com> <521CDFC7 dot 3000102 at suse dot com> <20130829073257 dot GA5797 at domone dot kolej dot mff dot cuni dot cz> <CAHjhQ92q08P5hFPTeW3Us3LDHfUXZEU2wjDDEdcx_EuQzO5kqg at mail dot gmail dot com>
This is fine I think.
--
Liubov
>
> On Thu, Aug 29, 2013 at 11:32 AM, OndÅej BÃlka <neleai@seznam.cz> wrote:
>>
>> On Tue, Aug 27, 2013 at 07:20:07PM +0200, Andreas Jaeger wrote:
>> > On 08/27/2013 07:10 PM, Liubov Dmitrieva wrote:
>> > > This patch is ok. We don't use SSE42 version for any Intel Processor,
>> > > so there is no changes here impacting Intel.
>> > > This is a good clean up.
>> >
>> > Thanks for the confirmation. Ondrey, could you update
>> > ./sysdeps/x86_64/multiarch/ifunc-impl-list.c for your change and resend
>> > both patches?
>> >
>> Here is v2 for rawmemchr
>>
>> * sysdeps/x86_64/multiarch/rawmemchr.S: Delete.
>> * sysdeps/x86_64/multiarch/ifunc-impl-list.c: Remove rawmemchr ifunc.
>>
>> ---
>> sysdeps/x86_64/multiarch/ifunc-impl-list.c | 6 --
>> sysdeps/x86_64/multiarch/rawmemchr.S | 103 -----------------------------
>> 2 files changed, 109 deletions(-)
>> delete mode 100644 sysdeps/x86_64/multiarch/rawmemchr.S
>>
>> diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
>> index 28d3579..d0992e1 100644
>> --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
>> +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
>> @@ -61,12 +61,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>> __memmove_ssse3)
>> IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_sse2))
>>
>> - /* Support sysdeps/x86_64/multiarch/rawmemchr.S. */
>> - IFUNC_IMPL (i, name, rawmemchr,
>> - IFUNC_IMPL_ADD (array, i, rawmemchr, HAS_SSE4_2,
>> - __rawmemchr_sse42)
>> - IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_sse2))
>> -
>> /* Support sysdeps/x86_64/multiarch/stpncpy.S. */
>> IFUNC_IMPL (i, name, stpncpy,
>> IFUNC_IMPL_ADD (array, i, stpncpy, HAS_SSSE3,
>> diff --git a/sysdeps/x86_64/multiarch/rawmemchr.S b/sysdeps/x86_64/multiarch/rawmemchr.S
>> deleted file mode 100644
>> index 50de38f..0000000
>> --- a/sysdeps/x86_64/multiarch/rawmemchr.S
>> +++ /dev/null
>> @@ -1,103 +0,0 @@
>> -/* Multiple versions of rawmemchr
>> - All versions must be listed in ifunc-impl-list.c.
>> - Copyright (C) 2009-2013 Free Software Foundation, Inc.
>> - Contributed by Ulrich Drepper <drepper@redhat.com>.
>> - This file is part of the GNU C Library.
>> -
>> - The GNU C Library is free software; you can redistribute it and/or
>> - modify it under the terms of the GNU Lesser General Public
>> - License as published by the Free Software Foundation; either
>> - version 2.1 of the License, or (at your option) any later version.
>> -
>> - The GNU C Library is distributed in the hope that it will be useful,
>> - but WITHOUT ANY WARRANTY; without even the implied warranty of
>> - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> - Lesser General Public License for more details.
>> -
>> - You should have received a copy of the GNU Lesser General Public
>> - License along with the GNU C Library; if not, see
>> - <http://www.gnu.org/licenses/>. */
>> -
>> -#include <sysdep.h>
>> -#include <init-arch.h>
>> -
>> -
>> -/* Define multiple versions only for the definition in lib. */
>> -#ifndef NOT_IN_libc
>> - .text
>> -ENTRY(rawmemchr)
>> - .type rawmemchr, @gnu_indirect_function
>> - cmpl $0, __cpu_features+KIND_OFFSET(%rip)
>> - jne 1f
>> - call __init_cpu_features
>> -1: testl $bit_Prefer_PMINUB_for_stringop, __cpu_features+FEATURE_OFFSET+index_Prefer_PMINUB_for_stringop(%rip)
>> - jnz 2f
>> - testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
>> - jz 2f
>> - leaq __rawmemchr_sse42(%rip), %rax
>> - ret
>> -2: leaq __rawmemchr_sse2(%rip), %rax
>> - ret
>> -
>> -END(rawmemchr)
>> -strong_alias (rawmemchr, __rawmemchr)
>> -
>> -
>> - .section .text.sse4.2,"ax",@progbits
>> - .align 16
>> - .type __rawmemchr_sse42, @function
>> - .globl __rawmemchr_sse42
>> - .hidden __rawmemchr_sse42
>> -__rawmemchr_sse42:
>> - cfi_startproc
>> - CALL_MCOUNT
>> - movd %esi, %xmm1
>> - movq %rdi, %rcx
>> - pxor %xmm2, %xmm2
>> - andq $~15, %rdi
>> - orl $0xffffffff, %esi
>> - pshufb %xmm2, %xmm1
>> - movdqa (%rdi), %xmm0
>> - subq %rdi, %rcx
>> - pcmpeqb %xmm1, %xmm0
>> - shl %cl, %esi
>> - pmovmskb %xmm0, %ecx
>> - movl $16, %eax
>> - movl $16, %edx
>> - andl %esi, %ecx
>> - jnz 1f
>> -
>> -2: pcmpestri $0x08, 16(%rdi), %xmm1
>> - leaq 16(%rdi), %rdi
>> - jnc 2b
>> -
>> - leaq (%rdi,%rcx), %rax
>> - ret
>> -
>> -1: bsfl %ecx, %eax
>> - addq %rdi, %rax
>> - ret
>> - cfi_endproc
>> - .size __rawmemchr_sse42, .-__rawmemchr_sse42
>> -
>> -
>> -# undef ENTRY
>> -# define ENTRY(name) \
>> - .type __rawmemchr_sse2, @function; \
>> - .align 16; \
>> - .globl __rawmemchr_sse2; \
>> - .hidden __rawmemchr_sse2; \
>> - __rawmemchr_sse2: cfi_startproc; \
>> - CALL_MCOUNT
>> -# undef END
>> -# define END(name) \
>> - cfi_endproc; .size __rawmemchr_sse2, .-__rawmemchr_sse2
>> -# undef libc_hidden_builtin_def
>> -/* It doesn't make sense to send libc-internal rawmemchr calls through a PLT.
>> - The speedup we get from using SSE4.2 instruction is likely eaten away
>> - by the indirect call in the PLT. */
>> -# define libc_hidden_builtin_def(name) \
>> - .globl __GI___rawmemchr; __GI___rawmemchr = __rawmemchr_sse2
>> -#endif
>> -
>> -#include "../rawmemchr.S"
>> --
>> 1.8.3.2
>>
>