This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [Patch] aarch64: Thunderx specific memcpy and memmove


On Saturday 25 March 2017 04:55 AM, Steve Ellcey wrote:
> If people think we should use the ThunderX version of memcpy for all
> aarch64 systems I am happy to drop this patch and create one that just
> changes memcpy.S to do the ThunderX style prefetches for all aarch64
> systems.

That could be done as an add-on if we find out that it is the case.

The patch looks good to me with the formatting fixups I have specified
inline.

Siddhesh

> 2017-03-24  Steve Ellcey  <sellcey@caviumnetworks.com>
> 
> 	* sysdeps/aarch64/memcpy.S (MEMMOVE, MEMCPY): New macros.
> 	(memmove): Use MEMMOVE for name.
> 	(memcpy): Use MEMCPY for name.  Add loop with prefetching
> 	under USE_THUNDERX macro.
> 	* sysdeps/aarch64/multiarch/Makefile: New file.
> 	* sysdeps/aarch64/multiarch/ifunc-impl-list.c: Likewise.
> 	* sysdeps/aarch64/multiarch/init-arch.h: Likewise.
> 	* sysdeps/aarch64/multiarch/memcpy.c: Likewise.
> 	* sysdeps/aarch64/multiarch/memcpy_generic.S: Likewise.
> 	* sysdeps/aarch64/multiarch/memcpy_thunderx.S: Likewise.
> 	* sysdeps/aarch64/multiarch/memmove.c: Likewise.
> 
> 
> ifunc.patch
> 
> 
> diff --git a/sysdeps/aarch64/memcpy.S b/sysdeps/aarch64/memcpy.S
> index 29af8b1..74444b4 100644
> --- a/sysdeps/aarch64/memcpy.S
> +++ b/sysdeps/aarch64/memcpy.S
> @@ -59,7 +59,14 @@
>     Overlapping large forward memmoves use a loop that copies backwards.
>  */
>  
> -ENTRY_ALIGN (memmove, 6)
> +#ifndef MEMMOVE
> +#  define MEMMOVE memmove

Single char indent.

> +#endif
> +#ifndef MEMCPY
> +#  define MEMCPY memcpy

Likewise.

> +#endif
> +
> +ENTRY_ALIGN (MEMMOVE, 6)
>  
>  	DELOUSE (0)
>  	DELOUSE (1)
> @@ -71,9 +78,9 @@ ENTRY_ALIGN (memmove, 6)
>  	b.lo	L(move_long)
>  
>  	/* Common case falls through into memcpy.  */
> -END (memmove)
> -libc_hidden_builtin_def (memmove)
> -ENTRY (memcpy)
> +END (MEMMOVE)
> +libc_hidden_builtin_def (MEMMOVE)
> +ENTRY (MEMCPY)
>  
>  	DELOUSE (0)
>  	DELOUSE (1)
> @@ -158,10 +165,22 @@ L(copy96):
>  
>  	.p2align 4
>  L(copy_long):
> +
> +#ifdef USE_THUNDERX
> +
> +	/* On thunderx, large memcpy's are helped by software prefetching.
> +	   This loop is identical to the one below it but with prefetching
> +	   instructions included.  For loops that are less than 32768 bytes,
> +	   the prefetching does not help and slow the code down so we only
> +	   use the prefetching loop for the largest memcpys.  */
> +
> +	cmp	count, #32768
> +	b.lo	L(copy_long_without_prefetch)
>  	and	tmp1, dstin, 15
>  	bic	dst, dstin, 15
>  	ldp	D_l, D_h, [src]
>  	sub	src, src, tmp1
> +	prfm	pldl1strm, [src, 384]
>  	add	count, count, tmp1	/* Count is now 16 too large.  */
>  	ldp	A_l, A_h, [src, 16]
>  	stp	D_l, D_h, [dstin]
> @@ -169,7 +188,10 @@ L(copy_long):
>  	ldp	C_l, C_h, [src, 48]
>  	ldp	D_l, D_h, [src, 64]!
>  	subs	count, count, 128 + 16	/* Test and readjust count.  */
> -	b.ls	2f
> +
> +L(prefetch_loop64):
> +	tbz	src, #6, 1f
> +	prfm	pldl1strm, [src, 512]
>  1:
>  	stp	A_l, A_h, [dst, 16]
>  	ldp	A_l, A_h, [src, 16]
> @@ -180,12 +202,40 @@ L(copy_long):
>  	stp	D_l, D_h, [dst, 64]!
>  	ldp	D_l, D_h, [src, 64]!
>  	subs	count, count, 64
> -	b.hi	1b
> +	b.hi	L(prefetch_loop64)
> +	b	L(last64)
> +
> +L(copy_long_without_prefetch):
> +#endif
> +
> +	and	tmp1, dstin, 15
> +	bic	dst, dstin, 15
> +	ldp	D_l, D_h, [src]
> +	sub	src, src, tmp1
> +	add	count, count, tmp1	/* Count is now 16 too large.  */
> +	ldp	A_l, A_h, [src, 16]
> +	stp	D_l, D_h, [dstin]
> +	ldp	B_l, B_h, [src, 32]
> +	ldp	C_l, C_h, [src, 48]
> +	ldp	D_l, D_h, [src, 64]!
> +	subs	count, count, 128 + 16	/* Test and readjust count.  */
> +	b.ls	L(last64)
> +L(loop64):
> +	stp	A_l, A_h, [dst, 16]
> +	ldp	A_l, A_h, [src, 16]
> +	stp	B_l, B_h, [dst, 32]
> +	ldp	B_l, B_h, [src, 32]
> +	stp	C_l, C_h, [dst, 48]
> +	ldp	C_l, C_h, [src, 48]
> +	stp	D_l, D_h, [dst, 64]!
> +	ldp	D_l, D_h, [src, 64]!
> +	subs	count, count, 64
> +	b.hi	L(loop64)
>  
>  	/* Write the last full set of 64 bytes.  The remainder is at most 64
>  	   bytes, so it is safe to always copy 64 bytes from the end even if
>  	   there is just 1 byte left.  */
> -2:
> +L(last64):
>  	ldp	E_l, E_h, [srcend, -64]
>  	stp	A_l, A_h, [dst, 16]
>  	ldp	A_l, A_h, [srcend, -48]
> @@ -256,5 +306,5 @@ L(move_long):
>  	stp	C_l, C_h, [dstin]
>  3:	ret
>  
> -END (memcpy)
> -libc_hidden_builtin_def (memcpy)
> +END (MEMCPY)
> +libc_hidden_builtin_def (MEMCPY)
> diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile
> index e69de29..78d52c7 100644
> --- a/sysdeps/aarch64/multiarch/Makefile
> +++ b/sysdeps/aarch64/multiarch/Makefile
> @@ -0,0 +1,3 @@
> +ifeq ($(subdir),string)
> +sysdep_routines += memcpy_generic memcpy_thunderx
> +endif
> diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
> index e69de29..c4f23df 100644
> --- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c
> +++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
> @@ -0,0 +1,51 @@
> +/* Enumerate available IFUNC implementations of a function.  AARCH64 version.
> +   Copyright (C) 2017 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +#include <assert.h>
> +#include <string.h>
> +#include <wchar.h>
> +#include <ldsodefs.h>
> +#include <ifunc-impl-list.h>
> +#include <init-arch.h>
> +#include <stdio.h>
> +
> +/* Maximum number of IFUNC implementations.  */
> +#define MAX_IFUNC	2
> +
> +size_t
> +__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> +			size_t max)
> +{
> +  assert (max >= MAX_IFUNC);
> +
> +  size_t i = 0;
> +
> +  INIT_ARCH ();
> +
> +  /* Support sysdeps/aarch64/multiarch/memcpy.c and memmove.c.  */
> +  IFUNC_IMPL (i, name, memcpy,
> +	      IFUNC_IMPL_ADD (array, i, memcpy, IS_THUNDERX (midr),
> +			      __memcpy_thunderx)
> +	      IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_generic))
> +  IFUNC_IMPL (i, name, memmove,
> +	      IFUNC_IMPL_ADD (array, i, memmove, IS_THUNDERX (midr),
> +			      __memmove_thunderx)
> +	      IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_generic))
> +
> +  return i;
> +}
> diff --git a/sysdeps/aarch64/multiarch/init-arch.h b/sysdeps/aarch64/multiarch/init-arch.h
> index e69de29..e690e00 100644
> --- a/sysdeps/aarch64/multiarch/init-arch.h
> +++ b/sysdeps/aarch64/multiarch/init-arch.h
> @@ -0,0 +1,22 @@
> +/* This file is part of the GNU C Library.

One line description of the file.

> +   Copyright (C) 2017 Free Software Foundation, Inc.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +#include <ldsodefs.h>
> +
> +#define INIT_ARCH()				\
> +  uint64_t __attribute__((unused)) midr =	\
> +    GLRO(dl_aarch64_cpu_features).midr_el1;
> diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c
> index e69de29..4e3f251 100644
> --- a/sysdeps/aarch64/multiarch/memcpy.c
> +++ b/sysdeps/aarch64/multiarch/memcpy.c
> @@ -0,0 +1,39 @@
> +/* Multiple versions of memcpy. AARCH64 version.
> +   Copyright (C) 2017 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +/* Define multiple versions only for the definition in libc.  */
> +
> +#if IS_IN (libc)
> +/* Redefine memcpy so that the compiler won't complain about the type
> +   mismatch with the IFUNC selector in strong_alias, below.  */
> +# undef memcpy
> +# define memcpy __redirect_memcpy
> +# include <string.h>
> +# include <init-arch.h>
> +
> +extern __typeof (__redirect_memcpy) __libc_memcpy;
> +
> +extern __typeof (__redirect_memcpy) __memcpy_generic attribute_hidden;
> +extern __typeof (__redirect_memcpy) __memcpy_thunderx attribute_hidden;
> +
> +libc_ifunc (__libc_memcpy,
> +            IS_THUNDERX (midr) ? __memcpy_thunderx : __memcpy_generic);
> +
> +#undef memcpy

Single char indent.

> +strong_alias (__libc_memcpy, memcpy);
> +#endif
> diff --git a/sysdeps/aarch64/multiarch/memcpy_generic.S b/sysdeps/aarch64/multiarch/memcpy_generic.S
> index e69de29..50e1a1c 100644
> --- a/sysdeps/aarch64/multiarch/memcpy_generic.S
> +++ b/sysdeps/aarch64/multiarch/memcpy_generic.S
> @@ -0,0 +1,42 @@
> +/* A Generic Optimized memcpy implementation for AARCH64.
> +   Copyright (C) 2017 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +/* The actual memcpy and memmove code is in ../memcpy.S.  If we are
> +   building libc this file defines __memcpy_generic and __memmove_generic.
> +   Otherwise the include of ../memcpy.S will define the normal __memcpy
> +   and__memmove entry points.  */
> +
> +#include <sysdep.h>
> +
> +#if IS_IN (libc)
> +
> +#define MEMCPY __memcpy_generic
> +#define MEMMOVE __memmove_generic
> +
> +/* Do not hide the generic versions of memcpy and memmove, we use them
> +   internally.  */
> +#undef libc_hidden_builtin_def
> +#define libc_hidden_builtin_def(name)
> +
> +/* It doesn't make sense to send libc-internal memcpy calls through a PLT. */
> +	.globl __GI_memcpy; __GI_memcpy = __memcpy_generic
> +	.globl __GI_memmove; __GI_memmove = __memmove_generic

Single char indent for all macro defs.

> +
> +#endif
> +
> +#include "../memcpy.S"
> diff --git a/sysdeps/aarch64/multiarch/memcpy_thunderx.S b/sysdeps/aarch64/multiarch/memcpy_thunderx.S
> index e69de29..ee971c8 100644
> --- a/sysdeps/aarch64/multiarch/memcpy_thunderx.S
> +++ b/sysdeps/aarch64/multiarch/memcpy_thunderx.S
> @@ -0,0 +1,32 @@
> +/* A Thunderx Optimized memcpy implementation for AARCH64.
> +   Copyright (C) 2017 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +/* The actual thunderx optimized code is in ../memcpy.S under the USE_THUNDERX
> +   ifdef.  If we are not building libc then we do not build anything when
> +   compiling this file and __memcpy is defined by memcpy_generic.S.  */
> +
> +#include <sysdep.h>
> +
> +#if IS_IN (libc)
> +
> +#define MEMCPY __memcpy_thunderx
> +#define MEMMOVE __memmove_thunderx
> +#define USE_THUNDERX
> +#include "../memcpy.S"

Single char indent for all macro defs.

> +
> +#endif
> diff --git a/sysdeps/aarch64/multiarch/memmove.c b/sysdeps/aarch64/multiarch/memmove.c
> index e69de29..8d7a146 100644
> --- a/sysdeps/aarch64/multiarch/memmove.c
> +++ b/sysdeps/aarch64/multiarch/memmove.c
> @@ -0,0 +1,39 @@
> +/* Multiple versions of memmove. AARCH64 version.
> +   Copyright (C) 2017 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +/* Define multiple versions only for the definition in libc.  */
> +
> +#if IS_IN (libc)
> +/* Redefine memmove so that the compiler won't complain about the type
> +   mismatch with the IFUNC selector in strong_alias, below.  */
> +# undef memmove
> +# define memmove __redirect_memmove
> +# include <string.h>
> +# include <init-arch.h>
> +
> +extern __typeof (__redirect_memmove) __libc_memmove;
> +
> +extern __typeof (__redirect_memmove) __memmove_generic attribute_hidden;
> +extern __typeof (__redirect_memmove) __memmove_thunderx attribute_hidden;
> +
> +libc_ifunc (__libc_memmove,
> +            IS_THUNDERX (midr) ? __memmove_thunderx : __memmove_generic);
> +
> +#undef memmove

Single char indent.

> +strong_alias (__libc_memmove, memmove);
> +#endif
> 


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]