This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH 3/3] sparc: M7 optimized memcpy/mempcpy/memmove/memset/bzero.



On 27/09/2017 13:09, Patrick McGehearty wrote:
> Tested in sparcv9-*-* and sparc64-*-* targets in both multi and
> non-multi arch configurations.
>
> Support added to identify Sparc M7/T7/S7/M8/T8 processor capability.
> Usual "make check" correctness tests run with no regressions.
> Performance tests run on Sparc S7 using new code and old niagara4 code.
>
> Optimizations for memcpy also apply to mempcpy and memmove
> where they share code. Optimizations for memset also apply
> to bzero as they share code.
>
> For memcpy/mempcpy/memmove, performance comparison with niagara4 code:
> Long word aligned data
>   0-127 bytes - minimal changes
>   128-1023 bytes - 7-30% gain
>   1024+ bytes - 1-7% gain (in cache); 30-100% gain (out of cache)
> Word aligned data
>   0-127 bytes - 50%+ gain
>   128-1023 bytes - 10-200% gain
>   1024+ bytes - 0-15% gain (in cache); 5-50% gain (out of cache)
> Unaligned data
>   0-127 bytes - 0-70%+ gain
>   128-447 bytes - 40-80%+ gain
>   448-511 bytes - 1-3% loss
>   512-4096 bytes - 2-3% gain (in cache); 0-20% gain (out of cache)
>   4096+ bytes - +/- 3% (in cache); 20-50% gain (out of cache)
>
> For memset/bzero, performance comparison with niagara4 code:
> For memset nonzero data,
>   256-1023 bytes - 60-90% gain (in cache); 5% gain (out of cache)
>   1K+ bytes - 80-260% gain (in cache); 40-80% gain (out of cache)
> For memset zero data (and bzero),
>   256-1023 bytes - 80-120% gain (in cache), 0% gain (out of cache)
>   1024+ bytes - 2-4x gain (in cache), 10-35% gain (out of cache)
> ---
>  ChangeLog                                          |   20 +
>  sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile   |    3 +-
>  .../sparcv9/multiarch/memcpy-memmove-niagara7.S    |    2 +
>  sysdeps/sparc/sparc32/sparcv9/multiarch/memmove.S  |    2 +
>  .../sparc32/sparcv9/multiarch/memset-niagara7.S    |    2 +
>  .../sparc/sparc32/sparcv9/multiarch/rtld-memmove.c |    1 +
>  sysdeps/sparc/sparc64/multiarch/Makefile           |    3 +-
>  sysdeps/sparc/sparc64/multiarch/ifunc-impl-list.c  |   13 +
>  .../sparc64/multiarch/memcpy-memmove-niagara7.S    |  979 ++++++++++++++++++++
>  sysdeps/sparc/sparc64/multiarch/memcpy.S           |   28 +-
>  sysdeps/sparc/sparc64/multiarch/memmove.S          |   72 ++
>  sysdeps/sparc/sparc64/multiarch/memset-niagara7.S  |  334 +++++++
>  sysdeps/sparc/sparc64/multiarch/memset.S           |   28 +-
>  sysdeps/sparc/sparc64/multiarch/rtld-memmove.c     |    1 +
>  14 files changed, 1482 insertions(+), 6 deletions(-)
>  create mode 100644 sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-memmove-niagara7.S
>  create mode 100644 sysdeps/sparc/sparc32/sparcv9/multiarch/memmove.S
>  create mode 100644 sysdeps/sparc/sparc32/sparcv9/multiarch/memset-niagara7.S
>  create mode 100644 sysdeps/sparc/sparc32/sparcv9/multiarch/rtld-memmove.c
>  create mode 100644 sysdeps/sparc/sparc64/multiarch/memcpy-memmove-niagara7.S
>  create mode 100644 sysdeps/sparc/sparc64/multiarch/memmove.S
>  create mode 100644 sysdeps/sparc/sparc64/multiarch/memset-niagara7.S
>  create mode 100644 sysdeps/sparc/sparc64/multiarch/rtld-memmove.c

I would prefer if you split this patch in a memcpy/mempcpy/memmove and
a memset/bzero one since they are separated implementations.

I also noticed this whitespace issues, so you might want to check this out:

---

patch03:886: trailing whitespace.
 *      lines from memory. Use ST_CHUNK stores to first element of each cache 
patch03:890: space before tab in indent.
        andn    %o2, 0x3f, %o5          /* %o5 is multiple of block size  */
patch03:891: space before tab in indent.
        and     %o2, 0x3f, %o2          /* residue bytes in %o2  */
patch03:892: trailing whitespace.
 
patch03:1216: new blank line at EOF.
+
warning: 5 lines add whitespace errors.
---

> diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy.S b/sysdeps/sparc/sparc64/multiarch/memcpy.S
> index b6396ee..d72f4b1 100644
> --- a/sysdeps/sparc/sparc64/multiarch/memcpy.S
> +++ b/sysdeps/sparc/sparc64/multiarch/memcpy.S
> @@ -27,7 +27,19 @@ ENTRY(memcpy)
>  # ifdef SHARED
>  	SETUP_PIC_REG_LEAF(o3, o5)
>  # endif
> -	set	HWCAP_SPARC_CRYPTO, %o1
> +	set	HWCAP_SPARC_ADP, %o1
> +	andcc	%o0, %o1, %g0
> +	be	1f
> +	 nop
> +# ifdef SHARED
> +	sethi	%gdop_hix22(__memcpy_niagara7), %o1
> +	xor	%o1, %gdop_lox10(__memcpy_niagara7), %o1
> +# else
> +	set	__memcpy_niagara7, %o1
> +# endif
> +	ba	10f
> +	 nop
> +1:      set	HWCAP_SPARC_CRYPTO, %o1
>  	andcc	%o0, %o1, %g0
>  	be	1f
>  	 andcc	%o0, HWCAP_SPARC_N2, %g0
> @@ -89,7 +101,19 @@ ENTRY(__mempcpy)
>  # ifdef SHARED
>  	SETUP_PIC_REG_LEAF(o3, o5)
>  # endif
> -	set	HWCAP_SPARC_CRYPTO, %o1
> +	set     HWCAP_SPARC_ADP, %o1
> +	andcc   %o0, %o1, %g0
> +        be      1f
> +	 nop
> +# ifdef SHARED
> +	sethi	%gdop_hix22(__mempcpy_niagara7), %o1
> +	xor	%o1, %gdop_lox10(__mempcpy_niagara7), %o1
> +# else
> +	set	__mempcpy_niagara7, %o1
> +# endif
> +	ba      10f
> +	 nop
> +1:	set	HWCAP_SPARC_CRYPTO, %o1
>  	andcc	%o0, %o1, %g0
>  	be	1f
>  	 andcc	%o0, HWCAP_SPARC_N2, %g0

Since you are touching this file, I think it would be better to change the
ifunc selector to a C implementation by using the __ifunc/sparc_libc_ifunc
macros.


> diff --git a/sysdeps/sparc/sparc64/multiarch/memmove.S b/sysdeps/sparc/sparc64/multiarch/memmove.S
> new file mode 100644
> index 0000000..43bdf08
> --- /dev/null
> +++ b/sysdeps/sparc/sparc64/multiarch/memmove.S
> @@ -0,0 +1,72 @@
> +/* Multiple versions of memmove and bcopy
> +   All versions must be listed in ifunc-impl-list.c.
> +   Copyright (C) 2017 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, write to the Free
> +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
> +   02111-1307 USA.  */
> +
> +#include <sysdep.h>
> +
> +#if IS_IN (libc)
> +	.text
> +ENTRY(memmove)
> +	.type	memmove, @gnu_indirect_function
> +# ifdef SHARED
> +	SETUP_PIC_REG_LEAF(o3, o5)
> +# endif
> +	set	HWCAP_SPARC_ADP, %o1
> +	andcc	%o0, %o1, %g0
> +	be	1f
> +	 nop
> +# ifdef SHARED
> +	sethi	%gdop_hix22(__memmove_niagara7), %o1
> +	xor	%o1, %gdop_lox10(__memmove_niagara7), %o1
> +# else
> +	set	__memmove_niagara7, %o1
> +# endif
> +	ba 10f
> +	 nop
> +1:
> +# ifdef SHARED
> +	sethi   %gdop_hix22(__memmove_ultra1), %o1
> +	xor     %o1, %gdop_lox10(__memmove_ultra1), %o1
> +# else
> +	set     __memmove_ultra1, %o1
> +# endif
> +10:
> +# ifdef SHARED
> +	add	%o3, %o1, %o1
> +# endif
> +	retl
> +	 mov	%o1, %o0
> +END(memmove)
> +
> +libc_hidden_builtin_def (memmove)
> +
> +#undef libc_hidden_builtin_def
> +#define libc_hidden_builtin_def(name)
> +#undef weak_alias
> +#define weak_alias(x, y)
> +#undef libc_hidden_def
> +#define libc_hidden_def(name)
> +
> +#define memmove __memmove_ultra1
> +#define __memmove __memmove_ultra1
> +
> +#endif
> +
> +#include "../memmove.S"
> +

As before, I think new ifunc resolvers should be avoid to be coded in assembly.
Also there is an extra line on this file.

> diff --git a/sysdeps/sparc/sparc64/multiarch/rtld-memmove.c b/sysdeps/sparc/sparc64/multiarch/rtld-memmove.c
> new file mode 100644
> index 0000000..66fe118
> --- /dev/null
> +++ b/sysdeps/sparc/sparc64/multiarch/rtld-memmove.c
> @@ -0,0 +1 @@
> +#include <../rtld-memmove.c>

Avoid using relative patch, include the default memmove using full paths.



Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]