This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Remove Prefer_SSE_for_memop on x64


This is an important cleanup. Maintainers, please don't leave it
without attention.
There is unnecessary extra check of sse support, but actually  sse
version is always chosen on x86_64.

--
Liubov Dmitrieva
Intel Corporation

2013/2/15 Dmitrieva Liubov <liubov.dmitrieva@gmail.com>:
> Looks good to me.
>
> But I would add a sentence more after "Update" in the Change Log
> entries  to clarify what is the update about...
>
> --
> Liubov Dmitrieva
> Intel Corporation
>
>
> 2013/2/15 OndÅej BÃlka <neleai@seznam.cz>:
>> Hello,
>> As I asked in
>> http://www.sourceware.org/ml/libc-alpha/2013-02/msg00203.html
>> nobody objected to keep non-sse memset on x64.
>>
>> This patch removes that implementation and flag Prefer_SSE_for_memop
>> which is always set and only choose sse memset over nonsse one.
>>
>> 2013-02-14   OndÅej BÃlka  <neleai@seznam.cz>
>>
>>         * sysdeps/x86_64/memset.S: Always define bcopy.
>>         * sysdeps/x86_64/multiarch/Makefile: Update.
>>         * sysdeps/x86_64/multiarch/ifunc-impl-list.c: Update.
>>         * sysdeps/x86_64/multiarch/init-arch.c: Remove Prefer_SSE_for_memop.
>>         * sysdeps/x86_64/multiarch/init-arch.h: Likewise.
>>         * sysdeps/x86_64/multiarch/bzero.S: Remove.
>>         * sysdeps/x86_64/multiarch/memset-x86-64.S: Likewise.
>>         * sysdeps/x86_64/multiarch/memset.S: Likewise.
>>         * sysdeps/x86_64/multiarch/memset_chk.S: Likewise.
>>
>> ---
>>  sysdeps/x86_64/memset.S                    |    2 +-
>>  sysdeps/x86_64/multiarch/Makefile          |    2 +-
>>  sysdeps/x86_64/multiarch/bzero.S           |   28 ----------
>>  sysdeps/x86_64/multiarch/ifunc-impl-list.c |   11 ----
>>  sysdeps/x86_64/multiarch/init-arch.c       |   11 ----
>>  sysdeps/x86_64/multiarch/init-arch.h       |    4 --
>>  sysdeps/x86_64/multiarch/memset-x86-64.S   |   19 -------
>>  sysdeps/x86_64/multiarch/memset.S          |   79 ----------------------------
>>  sysdeps/x86_64/multiarch/memset_chk.S      |   44 ---------------
>>  9 files changed, 2 insertions(+), 198 deletions(-)
>>  delete mode 100644 sysdeps/x86_64/multiarch/bzero.S
>>  delete mode 100644 sysdeps/x86_64/multiarch/memset-x86-64.S
>>  delete mode 100644 sysdeps/x86_64/multiarch/memset.S
>>  delete mode 100644 sysdeps/x86_64/multiarch/memset_chk.S
>>
>> diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S
>> index f3a4d44..b393efe 100644
>> --- a/sysdeps/x86_64/memset.S
>> +++ b/sysdeps/x86_64/memset.S
>> @@ -23,7 +23,7 @@
>>  #define __STOS_UPPER_BOUNDARY  $65536
>>
>>         .text
>> -#if !defined NOT_IN_libc && !defined USE_MULTIARCH
>> +#if !defined NOT_IN_libc
>>  ENTRY(__bzero)
>>         mov     %rsi,%rdx       /* Adjust parameter.  */
>>         xorl    %esi,%esi       /* Fill with 0s.  */
>> diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
>> index dd6c27d..4f7c070 100644
>> --- a/sysdeps/x86_64/multiarch/Makefile
>> +++ b/sysdeps/x86_64/multiarch/Makefile
>> @@ -10,7 +10,7 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
>>                    strend-sse4 memcmp-sse4 memcpy-ssse3 mempcpy-ssse3 \
>>                    memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back \
>>                    memmove-ssse3-back strcasestr-nonascii strcasecmp_l-ssse3 \
>> -                  strncase_l-ssse3 strlen-sse4 strlen-sse2-no-bsf memset-x86-64 \
>> +                  strncase_l-ssse3 strlen-sse4 strlen-sse2-no-bsf \
>>                    strcpy-ssse3 strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 \
>>                    strcpy-sse2-unaligned strncpy-sse2-unaligned \
>>                    stpcpy-sse2-unaligned stpncpy-sse2-unaligned \
>> diff --git a/sysdeps/x86_64/multiarch/bzero.S b/sysdeps/x86_64/multiarch/bzero.S
>> deleted file mode 100644
>> index 88e96ea..0000000
>> --- a/sysdeps/x86_64/multiarch/bzero.S
>> +++ /dev/null
>> @@ -1,28 +0,0 @@
>> -/* bzero.  x86-64 version.
>> -   Copyright (C) 2010-2013 Free Software Foundation, Inc.
>> -   This file is part of the GNU C Library.
>> -
>> -   The GNU C Library is free software; you can redistribute it and/or
>> -   modify it under the terms of the GNU Lesser General Public
>> -   License as published by the Free Software Foundation; either
>> -   version 2.1 of the License, or (at your option) any later version.
>> -
>> -   The GNU C Library is distributed in the hope that it will be useful,
>> -   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> -   Lesser General Public License for more details.
>> -
>> -   You should have received a copy of the GNU Lesser General Public
>> -   License along with the GNU C Library; if not, see
>> -   <http://www.gnu.org/licenses/>.  */
>> -
>> -#include <sysdep.h>
>> -#include <init-arch.h>
>> -
>> -       .text
>> -ENTRY(__bzero)
>> -       mov     %rsi,%rdx       /* Adjust parameter.  */
>> -       xorl    %esi,%esi       /* Fill with 0s.  */
>> -       jmp     __libc_memset   /* Branch to IFUNC memset.  */
>> -END(__bzero)
>> -weak_alias (__bzero, bzero)
>> diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
>> index 643cb2d..cb4aba3 100644
>> --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
>> +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
>> @@ -61,17 +61,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>>                               __memmove_ssse3)
>>               IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_sse2))
>>
>> -  /* Support sysdeps/x86_64/multiarch/memset_chk.S.  */
>> -  IFUNC_IMPL (i, name, __memset_chk,
>> -             IFUNC_IMPL_ADD (array, i, __memset_chk, 1, __memset_chk_sse2)
>> -             IFUNC_IMPL_ADD (array, i, __memset_chk, 1,
>> -                             __memset_chk_x86_64))
>> -
>> -  /* Support sysdeps/x86_64/multiarch/memset.S.  */
>> -  IFUNC_IMPL (i, name, memset,
>> -             IFUNC_IMPL_ADD (array, i, memset, 1, __memset_sse2)
>> -             IFUNC_IMPL_ADD (array, i, memset, 1, __memset_x86_64))
>> -
>>    /* Support sysdeps/x86_64/multiarch/rawmemchr.S.  */
>>    IFUNC_IMPL (i, name, rawmemchr,
>>               IFUNC_IMPL_ADD (array, i, rawmemchr, HAS_SSE4_2,
>> diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
>> index 992cbfb..7daaf46 100644
>> --- a/sysdeps/x86_64/multiarch/init-arch.c
>> +++ b/sysdeps/x86_64/multiarch/init-arch.c
>> @@ -58,11 +58,6 @@ __init_cpu_features (void)
>>
>>        get_common_indeces (&family, &model);
>>
>> -      /* Intel processors prefer SSE instruction for memory/string
>> -        routines if they are available.  */
>> -      __cpu_features.feature[index_Prefer_SSE_for_memop]
>> -       |= bit_Prefer_SSE_for_memop;
>> -
>>        unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
>>        unsigned int extended_family = (eax >> 20) & 0xff;
>>        unsigned int extended_model = (eax >> 12) & 0xf0;
>> @@ -125,12 +120,6 @@ __init_cpu_features (void)
>>
>>        ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
>>
>> -      /* AMD processors prefer SSE instructions for memory/string routines
>> -        if they are available, otherwise they prefer integer instructions.  */
>> -      if ((ecx & 0x200))
>> -       __cpu_features.feature[index_Prefer_SSE_for_memop]
>> -         |= bit_Prefer_SSE_for_memop;
>> -
>>        unsigned int eax;
>>        __cpuid (0x80000000, eax, ebx, ecx, edx);
>>        if (eax >= 0x80000001)
>> diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
>> index 0aece18..28edbf7 100644
>> --- a/sysdeps/x86_64/multiarch/init-arch.h
>> +++ b/sysdeps/x86_64/multiarch/init-arch.h
>> @@ -18,7 +18,6 @@
>>  #define bit_Fast_Rep_String            (1 << 0)
>>  #define bit_Fast_Copy_Backward         (1 << 1)
>>  #define bit_Slow_BSF                   (1 << 2)
>> -#define bit_Prefer_SSE_for_memop       (1 << 3)
>>  #define bit_Fast_Unaligned_Load                (1 << 4)
>>  #define bit_Prefer_PMINUB_for_stringop (1 << 5)
>>  #define bit_AVX_Usable                 (1 << 6)
>> @@ -58,7 +57,6 @@
>>  # define index_Fast_Rep_String         FEATURE_INDEX_1*FEATURE_SIZE
>>  # define index_Fast_Copy_Backward      FEATURE_INDEX_1*FEATURE_SIZE
>>  # define index_Slow_BSF                        FEATURE_INDEX_1*FEATURE_SIZE
>> -# define index_Prefer_SSE_for_memop    FEATURE_INDEX_1*FEATURE_SIZE
>>  # define index_Fast_Unaligned_Load     FEATURE_INDEX_1*FEATURE_SIZE
>>  # define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE
>>  # define index_AVX_Usable              FEATURE_INDEX_1*FEATURE_SIZE
>> @@ -157,7 +155,6 @@ extern const struct cpu_features *__get_cpu_features (void)
>>  # define index_Fast_Rep_String         FEATURE_INDEX_1
>>  # define index_Fast_Copy_Backward      FEATURE_INDEX_1
>>  # define index_Slow_BSF                        FEATURE_INDEX_1
>> -# define index_Prefer_SSE_for_memop    FEATURE_INDEX_1
>>  # define index_Fast_Unaligned_Load     FEATURE_INDEX_1
>>  # define index_AVX_Usable              FEATURE_INDEX_1
>>  # define index_FMA_Usable              FEATURE_INDEX_1
>> @@ -169,7 +166,6 @@ extern const struct cpu_features *__get_cpu_features (void)
>>  # define HAS_FAST_REP_STRING           HAS_ARCH_FEATURE (Fast_Rep_String)
>>  # define HAS_FAST_COPY_BACKWARD                HAS_ARCH_FEATURE (Fast_Copy_Backward)
>>  # define HAS_SLOW_BSF                  HAS_ARCH_FEATURE (Slow_BSF)
>> -# define HAS_PREFER_SSE_FOR_MEMOP      HAS_ARCH_FEATURE (Prefer_SSE_for_memop)
>>  # define HAS_FAST_UNALIGNED_LOAD       HAS_ARCH_FEATURE (Fast_Unaligned_Load)
>>  # define HAS_AVX                       HAS_ARCH_FEATURE (AVX_Usable)
>>  # define HAS_FMA                       HAS_ARCH_FEATURE (FMA_Usable)
>> diff --git a/sysdeps/x86_64/multiarch/memset-x86-64.S b/sysdeps/x86_64/multiarch/memset-x86-64.S
>> deleted file mode 100644
>> index 551d105..0000000
>> --- a/sysdeps/x86_64/multiarch/memset-x86-64.S
>> +++ /dev/null
>> @@ -1,19 +0,0 @@
>> -#include <sysdep.h>
>> -
>> -#ifndef NOT_IN_libc
>> -# undef ENTRY_CHK
>> -# define ENTRY_CHK(name) \
>> -       .type __memset_chk_x86_64, @function; \
>> -       .globl __memset_chk_x86_64; \
>> -       .p2align 4; \
>> -       __memset_chk_x86_64: cfi_startproc; \
>> -       CALL_MCOUNT
>> -# undef END_CHK
>> -# define END_CHK(name) \
>> -       cfi_endproc; .size __memset_chk_x86_64, .-__memset_chk_x86_64
>> -
>> -# undef libc_hidden_builtin_def
>> -# define libc_hidden_builtin_def(name)
>> -# define memset __memset_x86_64
>> -# include "../memset.S"
>> -#endif
>> diff --git a/sysdeps/x86_64/multiarch/memset.S b/sysdeps/x86_64/multiarch/memset.S
>> deleted file mode 100644
>> index 7f673fa..0000000
>> --- a/sysdeps/x86_64/multiarch/memset.S
>> +++ /dev/null
>> @@ -1,79 +0,0 @@
>> -/* Multiple versions of memset
>> -   All versions must be listed in ifunc-impl-list.c.
>> -   Copyright (C) 2010-2013 Free Software Foundation, Inc.
>> -   This file is part of the GNU C Library.
>> -
>> -   The GNU C Library is free software; you can redistribute it and/or
>> -   modify it under the terms of the GNU Lesser General Public
>> -   License as published by the Free Software Foundation; either
>> -   version 2.1 of the License, or (at your option) any later version.
>> -
>> -   The GNU C Library is distributed in the hope that it will be useful,
>> -   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> -   Lesser General Public License for more details.
>> -
>> -   You should have received a copy of the GNU Lesser General Public
>> -   License along with the GNU C Library; if not, see
>> -   <http://www.gnu.org/licenses/>.  */
>> -
>> -#include <sysdep.h>
>> -#include <init-arch.h>
>> -
>> -/* Define multiple versions only for the definition in lib.  */
>> -#ifndef NOT_IN_libc
>> -ENTRY(memset)
>> -       .type   memset, @gnu_indirect_function
>> -       cmpl    $0, __cpu_features+KIND_OFFSET(%rip)
>> -       jne     1f
>> -       call    __init_cpu_features
>> -1:     leaq    __memset_x86_64(%rip), %rax
>> -       testl   $bit_Prefer_SSE_for_memop, __cpu_features+FEATURE_OFFSET+index_Prefer_SSE_for_memop(%rip)
>> -       jz      2f
>> -       leaq    __memset_sse2(%rip), %rax
>> -2:     ret
>> -END(memset)
>> -
>> -/* Define internal IFUNC memset for bzero.  */
>> -       .globl __libc_memset
>> -       .hidden __libc_memset
>> -       __libc_memset = memset
>> -
>> -# define USE_SSE2 1
>> -
>> -# undef ENTRY
>> -# define ENTRY(name) \
>> -       .type __memset_sse2, @function; \
>> -       .globl __memset_sse2; \
>> -       .p2align 4; \
>> -       __memset_sse2: cfi_startproc; \
>> -       CALL_MCOUNT
>> -# undef END
>> -# define END(name) \
>> -       cfi_endproc; .size __memset_sse2, .-__memset_sse2
>> -
>> -# undef ENTRY_CHK
>> -# define ENTRY_CHK(name) \
>> -       .type __memset_chk_sse2, @function; \
>> -       .globl __memset_chk_sse2; \
>> -       .p2align 4; \
>> -       __memset_chk_sse2: cfi_startproc; \
>> -       CALL_MCOUNT
>> -# undef END_CHK
>> -# define END_CHK(name) \
>> -       cfi_endproc; .size __memset_chk_sse2, .-__memset_chk_sse2
>> -
>> -# ifdef SHARED
>> -#  undef libc_hidden_builtin_def
>> -/* It doesn't make sense to send libc-internal memset calls through a PLT.
>> -   The speedup we get from using GPR instruction is likely eaten away
>> -   by the indirect call in the PLT.  */
>> -#  define libc_hidden_builtin_def(name) \
>> -       .globl __GI_memset; __GI_memset = __memset_sse2
>> -# endif
>> -
>> -# undef strong_alias
>> -# define strong_alias(original, alias)
>> -#endif
>> -
>> -#include "../memset.S"
>> diff --git a/sysdeps/x86_64/multiarch/memset_chk.S b/sysdeps/x86_64/multiarch/memset_chk.S
>> deleted file mode 100644
>> index 55e2635..0000000
>> --- a/sysdeps/x86_64/multiarch/memset_chk.S
>> +++ /dev/null
>> @@ -1,44 +0,0 @@
>> -/* Multiple versions of __memset_chk
>> -   All versions must be listed in ifunc-impl-list.c.
>> -   Copyright (C) 2010-2013 Free Software Foundation, Inc.
>> -   This file is part of the GNU C Library.
>> -
>> -   The GNU C Library is free software; you can redistribute it and/or
>> -   modify it under the terms of the GNU Lesser General Public
>> -   License as published by the Free Software Foundation; either
>> -   version 2.1 of the License, or (at your option) any later version.
>> -
>> -   The GNU C Library is distributed in the hope that it will be useful,
>> -   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> -   Lesser General Public License for more details.
>> -
>> -   You should have received a copy of the GNU Lesser General Public
>> -   License along with the GNU C Library; if not, see
>> -   <http://www.gnu.org/licenses/>.  */
>> -
>> -#include <sysdep.h>
>> -#include <init-arch.h>
>> -
>> -/* Define multiple versions only for the definition in lib.  */
>> -#ifndef NOT_IN_libc
>> -# ifdef SHARED
>> -ENTRY(__memset_chk)
>> -       .type   __memset_chk, @gnu_indirect_function
>> -       cmpl    $0, __cpu_features+KIND_OFFSET(%rip)
>> -       jne     1f
>> -       call    __init_cpu_features
>> -1:     leaq    __memset_chk_x86_64(%rip), %rax
>> -       testl   $bit_Prefer_SSE_for_memop, __cpu_features+FEATURE_OFFSET+index_Prefer_SSE_for_memop(%rip)
>> -       jz      2f
>> -       leaq    __memset_chk_sse2(%rip), %rax
>> -2:     ret
>> -END(__memset_chk)
>> -
>> -strong_alias (__memset_chk, __memset_zero_constant_len_parameter)
>> -       .section .gnu.warning.__memset_zero_constant_len_parameter
>> -       .string "memset used with constant zero length parameter; this could be due to transposed parameters"
>> -# else
>> -#  include "../memset_chk.S"
>> -# endif
>> -#endif
>> --
>> 1.7.4.4
>>


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]