This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |
Other format: | [Raw text] |
Attached memcpy_profile result for __mempcpy_avx2_unaligned. Thanks Ling 2013/7/8, ling.ma.program@gmail.com <ling.ma.program@gmail.com>: > From: Ma Ling <ling.ml@alibaba-inc.com> > > In this version we manage to avoid branch instructions, and force > destination to be aligned > with avx2 instruction. We modified gcc.403 so that we can only measure > memcpy function, > gcc.403 benchmarks indicate the version improved performance from 4% to 16% > on different cases . > > Ondra, I will send out results from your memcpy_profile. > > Best Regards > Ling > --- > sysdeps/x86_64/multiarch/Makefile | 5 +- > sysdeps/x86_64/multiarch/ifunc-defines.sym | 2 + > sysdeps/x86_64/multiarch/ifunc-impl-list.c | 11 + > sysdeps/x86_64/multiarch/memcpy-avx2-unaligned.S | 438 > ++++++++++++++++++++++ > sysdeps/x86_64/multiarch/memmove-avx2-unaligned.S | 4 + > sysdeps/x86_64/multiarch/mempcpy-avx2-unaligned.S | 4 + > 6 files changed, 462 insertions(+), 2 deletions(-) > create mode 100644 sysdeps/x86_64/multiarch/memcpy-avx2-unaligned.S > create mode 100644 sysdeps/x86_64/multiarch/memmove-avx2-unaligned.S > create mode 100644 sysdeps/x86_64/multiarch/mempcpy-avx2-unaligned.S > > diff --git a/sysdeps/x86_64/multiarch/Makefile > b/sysdeps/x86_64/multiarch/Makefile > index dd6c27d..02c0a2a 100644 > --- a/sysdeps/x86_64/multiarch/Makefile > +++ b/sysdeps/x86_64/multiarch/Makefile > @@ -8,8 +8,9 @@ ifeq ($(subdir),string) > > sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 > \ > strend-sse4 memcmp-sse4 memcpy-ssse3 mempcpy-ssse3 \ > - memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back \ > - memmove-ssse3-back strcasestr-nonascii strcasecmp_l-ssse3 \ > + memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back memmove-ssse3-back > \ > + memcpy-avx2-unaligned mempcpy-avx2-unaligned memmove-avx2-unaligned \ > + strcasestr-nonascii strcasecmp_l-ssse3 \ > strncase_l-ssse3 strlen-sse4 strlen-sse2-no-bsf memset-x86-64 \ > strcpy-ssse3 strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 \ > strcpy-sse2-unaligned strncpy-sse2-unaligned \ > diff --git a/sysdeps/x86_64/multiarch/ifunc-defines.sym > b/sysdeps/x86_64/multiarch/ifunc-defines.sym > index eb1538a..448b8c4 100644 > --- a/sysdeps/x86_64/multiarch/ifunc-defines.sym > +++ b/sysdeps/x86_64/multiarch/ifunc-defines.sym > @@ -17,4 +17,6 @@ FEATURE_OFFSET offsetof (struct cpu_features, feature) > FEATURE_SIZE sizeof (unsigned int) > > COMMON_CPUID_INDEX_1 > +COMMON_CPUID_INDEX_7 > FEATURE_INDEX_1 > +FEATURE_INDEX_7 > diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c > b/sysdeps/x86_64/multiarch/ifunc-impl-list.c > index 332a60d..5fb5663 100644 > --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c > +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c > @@ -50,6 +50,8 @@ __libc_ifunc_impl_list (const char *name, struct > libc_ifunc_impl *array, > __memmove_chk_ssse3_back) > IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3, > __memmove_chk_ssse3) > + IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_AVX2, > + __memmove_chk_avx2_unaligned) > IFUNC_IMPL_ADD (array, i, __memmove_chk, 1, > __memmove_chk_sse2)) > > @@ -59,6 +61,8 @@ __libc_ifunc_impl_list (const char *name, struct > libc_ifunc_impl *array, > __memmove_ssse3_back) > IFUNC_IMPL_ADD (array, i, memmove, HAS_SSSE3, > __memmove_ssse3) > + IFUNC_IMPL_ADD (array, i, memmove, HAS_AVX2, > + __memmove_avx2_unaligned) > IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_sse2)) > > /* Support sysdeps/x86_64/multiarch/memset_chk.S. */ > @@ -235,6 +239,8 @@ __libc_ifunc_impl_list (const char *name, struct > libc_ifunc_impl *array, > __memcpy_chk_ssse3_back) > IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSSE3, > __memcpy_chk_ssse3) > + IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_AVX2, > + __memcpy_chk_avx2_unaligned) > IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1, > __memcpy_chk_sse2)) > > @@ -243,6 +249,7 @@ __libc_ifunc_impl_list (const char *name, struct > libc_ifunc_impl *array, > IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSSE3, > __memcpy_ssse3_back) > IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSSE3, __memcpy_ssse3) > + IFUNC_IMPL_ADD (array, i, memcpy, HAS_AVX2, > __memcpy_avx2_unaligned) > IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2)) > > /* Support sysdeps/x86_64/multiarch/mempcpy_chk.S. */ > @@ -251,6 +258,8 @@ __libc_ifunc_impl_list (const char *name, struct > libc_ifunc_impl *array, > __mempcpy_chk_ssse3_back) > IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_SSSE3, > __mempcpy_chk_ssse3) > + IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_AVX2, > + __mempcpy_chk_avx2_unaligned) > IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1, > __mempcpy_chk_sse2)) > > @@ -260,6 +269,8 @@ __libc_ifunc_impl_list (const char *name, struct > libc_ifunc_impl *array, > __mempcpy_ssse3_back) > IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSSE3, > __mempcpy_ssse3) > + IFUNC_IMPL_ADD (array, i, mempcpy, HAS_AVX2, > + __mempcpy_avx2_unaligned) > IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_sse2)) > > /* Support sysdeps/x86_64/multiarch/strlen.S. */ > diff --git a/sysdeps/x86_64/multiarch/memcpy-avx2-unaligned.S > b/sysdeps/x86_64/multiarch/memcpy-avx2-unaligned.S > new file mode 100644 > index 0000000..d32cfad > --- /dev/null > +++ b/sysdeps/x86_64/multiarch/memcpy-avx2-unaligned.S > @@ -0,0 +1,438 @@ > +/* memcpy with AVX2 > + Copyright (C) 2010 Free Software Foundation, Inc. > + Contributed by Intel Corporation. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <http://www.gnu.org/licenses/>. */ > + > +#include <sysdep.h> > + > +#if !defined NOT_IN_libc \ > + && (defined SHARED \ > + || defined USE_AS_MEMMOVE \ > + || !defined USE_MULTIARCH) > + > +#include "asm-syntax.h" > + > +#ifndef MEMCPY > +# define MEMCPY __memcpy_avx2_unaligned > +# define MEMCPY_CHK __memcpy_chk_avx2_unaligned > +#endif > + > +#ifndef L > +# define L(label) .L##label > +#endif > + > +#ifndef ALIGN > +# define ALIGN(n) .p2align n > +#endif > + > +#ifndef cfi_startproc > +# define cfi_startproc .cfi_startproc > +#endif > + > +#ifndef cfi_endproc > +# define cfi_endproc .cfi_endproc > +#endif > + > +#ifndef ENTRY > +# define ENTRY(name) \ > + .type name, @function; \ > + .globl name; \ > + ALIGN(4); \ > +name: \ > + cfi_startproc > +#endif > + > +#ifndef END > +# define END(name) \ > + cfi_endproc; \ > + .size name, .-name > +#endif > + > + .section .text.avx2,"ax",@progbits > +#if !defined USE_AS_BCOPY > +ENTRY (MEMCPY_CHK) > + cmpq %rdx, %rcx > + jb HIDDEN_JUMPTARGET (__chk_fail) > +END (MEMCPY_CHK) > +#endif > + > +ENTRY (MEMCPY) > + vzeroupper > + mov %rdi, %rax > + > +#ifdef USE_AS_MEMPCPY > + add %rdx, %rax > +#endif > + > + lea (%rsi, %rdx), %r8 > + lea (%rdi, %rdx), %r9 > + cmp $256, %rdx > + ja L(256bytesormore) > + cmp $128, %edx > + jb L(less_128bytes) > + vmovups (%rsi), %xmm0 > + vmovups 0x10(%rsi), %xmm1 > + vmovups 0x20(%rsi), %xmm2 > + vmovups 0x30(%rsi), %xmm3 > + vmovups 0x40(%rsi), %xmm4 > + vmovups 0x50(%rsi), %xmm5 > + vmovups 0x60(%rsi), %xmm6 > + vmovups 0x70(%rsi), %xmm7 > + vmovups -0x80(%r8), %xmm8 > + vmovups -0x70(%r8), %xmm9 > + vmovups -0x60(%r8), %xmm10 > + vmovups -0x50(%r8), %xmm11 > + vmovups -0x40(%r8), %xmm12 > + vmovups -0x30(%r8), %xmm13 > + vmovups -0x20(%r8), %xmm14 > + vmovups -0x10(%r8), %xmm15 > + vmovups %xmm0, (%rdi) > + vmovups %xmm1, 0x10(%rdi) > + vmovups %xmm2, 0x20(%rdi) > + vmovups %xmm3, 0x30(%rdi) > + vmovups %xmm4, 0x40(%rdi) > + vmovups %xmm5, 0x50(%rdi) > + vmovups %xmm6, 0x60(%rdi) > + vmovups %xmm7, 0x70(%rdi) > + vmovups %xmm8, -0x80(%r9) > + vmovups %xmm9, -0x70(%r9) > + vmovups %xmm10, -0x60(%r9) > + vmovups %xmm11, -0x50(%r9) > + vmovups %xmm12, -0x40(%r9) > + vmovups %xmm13, -0x30(%r9) > + vmovups %xmm14, -0x20(%r9) > + vmovups %xmm15, -0x10(%r9) > + ret > + ALIGN(4) > +L(less_128bytes): > + cmp $64, %edx > + jb L(less_64bytes) > + vmovups (%rsi), %xmm0 > + vmovups 0x10(%rsi), %xmm1 > + vmovups 0x20(%rsi), %xmm2 > + vmovups 0x30(%rsi), %xmm3 > + vmovups -0x40(%r8), %xmm4 > + vmovups -0x30(%r8), %xmm5 > + vmovups -0x20(%r8), %xmm6 > + vmovups -0x10(%r8), %xmm7 > + vmovups %xmm0, (%rdi) > + vmovups %xmm1, 0x10(%rdi) > + vmovups %xmm2, 0x20(%rdi) > + vmovups %xmm3, 0x30(%rdi) > + vmovups %xmm4, -0x40(%r9) > + vmovups %xmm5, -0x30(%r9) > + vmovups %xmm6, -0x20(%r9) > + vmovups %xmm7, -0x10(%r9) > + ret > + ALIGN(4) > +L(less_64bytes): > + cmp $32, %edx > + jb L(less_32bytes) > + vmovups (%rsi), %xmm0 > + vmovups 0x10(%rsi), %xmm1 > + vmovups -0x20(%r8), %xmm6 > + vmovups -0x10(%r8), %xmm7 > + vmovups %xmm0, (%rdi) > + vmovups %xmm1, 0x10(%rdi) > + vmovups %xmm6, -0x20(%r9) > + vmovups %xmm7, -0x10(%r9) > + retq > + ALIGN(4) > +L(less_32bytes): > + cmp $16, %edx > + jb L(less_16bytes) > + vmovups (%rsi), %xmm0 > + vmovups -0x10(%r8), %xmm7 > + vmovups %xmm0, (%rdi) > + vmovups %xmm7, -0x10(%r9) > + retq > + ALIGN(4) > +L(less_16bytes): > + cmp $8, %edx > + jb L(less_8bytes) > + movq (%rsi), %rcx > + movq -0x08(%r8), %r10 > + movq %rcx, (%rdi) > + movq %r10, -0x08(%r9) > + retq > + ALIGN(4) > +L(less_8bytes): > + cmp $4, %edx > + jb L(less_4bytes) > + mov (%rsi), %ecx > + mov -0x04(%r8), %edx > + mov %ecx, (%rdi) > + mov %edx, -0x04(%r9) > + ret > + ALIGN(4) > +L(less_4bytes): > + cmp $2, %edx > + jb L(less_2bytes) > + mov (%rsi), %cx > + mov -0x02(%r8), %dx > + mov %cx, (%rdi) > + mov %dx, -0x02(%r9) > + ret > + ALIGN(4) > +L(less_2bytes): > + cmp $1, %rdx > + jb L(less_0bytes) > + mov (%rsi), %cl > + mov %cl, (%rdi) > +L(less_0bytes): > + retq > + > + ALIGN(4) > +L(256bytesormore): > + > +#ifdef USE_AS_MEMMOVE > + cmp %rsi, %rdi > + jae L(copy_backward) > +#endif > + cmp $2048, %rdx > + jae L(gobble_data_movsb) > + > + vmovups -0x80(%r8), %xmm8 > + vmovups -0x70(%r8), %xmm9 > + vmovups -0x60(%r8), %xmm10 > + vmovups -0x50(%r8), %xmm11 > + vmovups -0x40(%r8), %xmm12 > + vmovups -0x30(%r8), %xmm13 > + vmovups -0x20(%r8), %xmm14 > + vmovups -0x10(%r8), %xmm15 > + vmovups (%rsi), %ymm4 > + mov %rdi, %r10 > + and $-32, %rdi > + add $32, %rdi > + mov %rdi, %r11 > + sub %r10, %r11 > + sub %r11, %rdx > + add %r11, %rsi > + sub $0x80, %rdx > +L(goble_128_loop): > + vmovups (%rsi), %ymm0 > + vmovups 0x20(%rsi), %ymm1 > + vmovups 0x40(%rsi), %ymm2 > + vmovups 0x60(%rsi), %ymm3 > + lea 0x80(%rsi), %rsi > + vmovaps %ymm0, (%rdi) > + vmovaps %ymm1, 0x20(%rdi) > + vmovaps %ymm2, 0x40(%rdi) > + vmovaps %ymm3, 0x60(%rdi) > + lea 0x80(%rdi), %rdi > + sub $0x80, %rdx > + jae L(goble_128_loop) > + vmovups %ymm4, (%r10) > + vzeroupper > + vmovups %xmm8, -0x80(%r9) > + vmovups %xmm9, -0x70(%r9) > + vmovups %xmm10, -0x60(%r9) > + vmovups %xmm11, -0x50(%r9) > + vmovups %xmm12, -0x40(%r9) > + vmovups %xmm13, -0x30(%r9) > + vmovups %xmm14, -0x20(%r9) > + vmovups %xmm15, -0x10(%r9) > + ret > + > +L(gobble_data_movsb): > + > +#ifdef SHARED_CACHE_SIZE_HALF > + mov $SHARED_CACHE_SIZE_HALF, %rcx > +#else > + mov __x86_64_shared_cache_size_half(%rip), %rcx > +#endif > + shl $3, %rcx > + > +#ifdef USE_AS_MEMMOVE > + mov %rsi, %r10 > + sub %rdi, %r10 > + cmp %rdx, %r10 > + jae L(memmove_use_memcpy_fwd) > + cmp %rcx, %r10 > + jae L(memmove_use_memcpy_fwd) > + jmp L(gobble_mem_fwd_llc_start) > +L(memmove_use_memcpy_fwd): > +#endif > + > + cmp %rcx, %rdx > + ja L(gobble_big_data_fwd) > + > +#ifdef USE_AS_MEMMOVE > +L(gobble_mem_fwd_llc_start): > +#endif > + mov %rdx, %rcx > + rep movsb > + ret > + > +L(gobble_big_data_fwd): > + vmovups (%rsi), %ymm4 > + vmovups -0x80(%r8), %xmm5 > + vmovups -0x70(%r8), %xmm6 > + vmovups -0x60(%r8), %xmm7 > + vmovups -0x50(%r8), %xmm8 > + vmovups -0x40(%r8), %xmm9 > + vmovups -0x30(%r8), %xmm10 > + vmovups -0x20(%r8), %xmm11 > + vmovups -0x10(%r8), %xmm12 > + mov %rdi, %r8 > + and $-32, %rdi > + add $32, %rdi > + mov %rdi, %r10 > + sub %r8, %r10 > + sub %r10, %rdx > + add %r10, %rsi > + sub $0x80, %rdx > +L(gobble_mem_fwd_loop): > + prefetcht0 0x1c0(%rsi) > + prefetcht0 0x280(%rsi) > + vmovups (%rsi), %xmm0 > + vmovups 0x10(%rsi), %xmm1 > + vmovups 0x20(%rsi), %xmm2 > + vmovups 0x30(%rsi), %xmm3 > + vmovntdq %xmm0, (%rdi) > + vmovntdq %xmm1, 0x10(%rdi) > + vmovntdq %xmm2, 0x20(%rdi) > + vmovntdq %xmm3, 0x30(%rdi) > + vmovups 0x40(%rsi), %xmm0 > + vmovups 0x50(%rsi), %xmm1 > + vmovups 0x60(%rsi), %xmm2 > + vmovups 0x70(%rsi), %xmm3 > + lea 0x80(%rsi), %rsi > + vmovntdq %xmm0, 0x40(%rdi) > + vmovntdq %xmm1, 0x50(%rdi) > + vmovntdq %xmm2, 0x60(%rdi) > + vmovntdq %xmm3, 0x70(%rdi) > + lea 0x80(%rdi), %rdi > + sub $0x80, %rdx > + jae L(gobble_mem_fwd_loop) > + sfence > + vmovups %ymm4, (%r8) > + vzeroupper > + vmovups %xmm5, -0x80(%r9) > + vmovups %xmm6, -0x70(%r9) > + vmovups %xmm7, -0x60(%r9) > + vmovups %xmm8, -0x50(%r9) > + vmovups %xmm9, -0x40(%r9) > + vmovups %xmm10, -0x30(%r9) > + vmovups %xmm11, -0x20(%r9) > + vmovups %xmm12, -0x10(%r9) > + ret > + > + ALIGN (4) > +L(copy_backward): > +#ifdef SHARED_CACHE_SIZE_HALF > + mov $SHARED_CACHE_SIZE_HALF, %rcx > +#else > + mov __x86_64_shared_cache_size_half(%rip), %rcx > +#endif > + shl $3, %rcx > + vmovups (%rsi), %xmm8 > + vmovups 0x10(%rsi), %xmm9 > + vmovups 0x20(%rsi), %xmm10 > + vmovups 0x30(%rsi), %xmm11 > + vmovups 0x40(%rsi), %xmm12 > + vmovups 0x50(%rsi), %xmm13 > + vmovups 0x60(%rsi), %xmm14 > + vmovups 0x70(%rsi), %xmm15 > + mov %rdi, %r9 > + add %rdx, %rsi > + add %rdx, %rdi > + vmovups -0x20(%rsi), %ymm4 > + lea -0x20(%rdi), %r10 > + mov %rdi, %r11 > + and $0x1f, %r11 > + xor %r11, %rdi > + sub %r11, %rsi > + sub %r11, %rdx > +#ifdef USE_AS_MEMMOVE > + mov %rdi, %r11 > + sub %rsi, %r11 > + cmp %rdx, %r11 > + jae L(memmove_use_memcpy_bwd) > + cmp %rcx, %r11 > + jae L(memmove_use_memcpy_bwd) > + jmp L(gobble_mem_bwd_llc_start) > +#endif > +L(memmove_use_memcpy_bwd): > + cmp %rcx, %rdx > + ja L(gobble_big_data_bwd) > +L(gobble_mem_bwd_llc_start): > + sub $0x80, %rdx > +L(gobble_mem_bwd_llc): > + vmovups -0x20(%rsi), %ymm0 > + vmovups -0x40(%rsi), %ymm1 > + vmovups -0x60(%rsi), %ymm2 > + vmovups -0x80(%rsi), %ymm3 > + lea -0x80(%rsi), %rsi > + vmovaps %ymm0, -0x20(%rdi) > + vmovaps %ymm1, -0x40(%rdi) > + vmovaps %ymm2, -0x60(%rdi) > + vmovaps %ymm3, -0x80(%rdi) > + lea -0x80(%rdi), %rdi > + sub $0x80, %rdx > + jae L(gobble_mem_bwd_llc) > + vmovups %ymm4, (%r10) > + vzeroupper > + vmovups %xmm8, (%r9) > + vmovups %xmm9, 0x10(%r9) > + vmovups %xmm10, 0x20(%r9) > + vmovups %xmm11, 0x30(%r9) > + vmovups %xmm12, 0x40(%r9) > + vmovups %xmm13, 0x50(%r9) > + vmovups %xmm14, 0x60(%r9) > + vmovups %xmm15, 0x70(%r9) > + ret > + > +L(gobble_big_data_bwd): > + sub $0x80, %rdx > +L(gobble_mem_bwd_loop): > + prefetcht0 -0x1c0(%rsi) > + prefetcht0 -0x280(%rsi) > + vmovups -0x10(%rsi), %xmm0 > + vmovups -0x20(%rsi), %xmm1 > + vmovups -0x30(%rsi), %xmm2 > + vmovups -0x40(%rsi), %xmm3 > + vmovntdq %xmm0, -0x10(%rdi) > + vmovntdq %xmm1, -0x20(%rdi) > + vmovntdq %xmm2, -0x30(%rdi) > + vmovntdq %xmm3, -0x40(%rdi) > + vmovups -0x50(%rsi), %xmm0 > + vmovups -0x60(%rsi), %xmm1 > + vmovups -0x70(%rsi), %xmm2 > + vmovups -0x80(%rsi), %xmm3 > + lea -0x80(%rsi), %rsi > + vmovntdq %xmm0, -0x50(%rdi) > + vmovntdq %xmm1, -0x60(%rdi) > + vmovntdq %xmm2, -0x70(%rdi) > + vmovntdq %xmm3, -0x80(%rdi) > + lea -0x80(%rdi), %rdi > + sub $0x80, %rdx > + jae L(gobble_mem_bwd_loop) > + sfence > + vmovups %ymm4, (%r10) > + vzeroupper > + vmovups %xmm8, (%r9) > + vmovups %xmm9, 0x10(%r9) > + vmovups %xmm10, 0x20(%r9) > + vmovups %xmm11, 0x30(%r9) > + vmovups %xmm12, 0x40(%r9) > + vmovups %xmm13, 0x50(%r9) > + vmovups %xmm14, 0x60(%r9) > + vmovups %xmm15, 0x70(%r9) > + ret > +END (MEMCPY) > +#endif > diff --git a/sysdeps/x86_64/multiarch/memmove-avx2-unaligned.S > b/sysdeps/x86_64/multiarch/memmove-avx2-unaligned.S > new file mode 100644 > index 0000000..ddb2090 > --- /dev/null > +++ b/sysdeps/x86_64/multiarch/memmove-avx2-unaligned.S > @@ -0,0 +1,4 @@ > +#define USE_AS_MEMMOVE > +#define MEMCPY __memmove_avx2_unaligned > +#define MEMCPY_CHK __memmove_chk_avx2_unaligned > +#include "memcpy-avx2-unaligned.S" > diff --git a/sysdeps/x86_64/multiarch/mempcpy-avx2-unaligned.S > b/sysdeps/x86_64/multiarch/mempcpy-avx2-unaligned.S > new file mode 100644 > index 0000000..a2f4af9 > --- /dev/null > +++ b/sysdeps/x86_64/multiarch/mempcpy-avx2-unaligned.S > @@ -0,0 +1,4 @@ > +#define USE_AS_MEMPCPY > +#define MEMCPY __mempcpy_avx2_unaligned > +#define MEMCPY_CHK __mempcpy_chk_avx2_unaligned > +#include "memcpy-avx2-unaligned.S" > -- > 1.8.1.4 > >
Attachment:
results.tar.bz2
Description: BZip2 compressed data
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |