This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH 10/11] hppa: Add haszero.h and whichzero.h



On 17/12/2016 04:57, Richard Henderson wrote:

> +static inline unsigned long int
> +haszero(unsigned long int x)
> +{
> +#if __GNUC_PREREQ(4, 5)
> +  /* It's more useful to expose a control transfer to the compiler
> +     than to expose a proper boolean result.  */
> +  if (sizeof(x) == 8)
> +    asm goto ("uxor,*sbz %%r0,%0,%%r0\n\tb,n %l1" : : "r"(x) : : nbz);
> +  else
> +    asm goto ("uxor,sbz %%r0,%0,%%r0\n\tb,n %l1" : : "r"(x) : : nbz);
> +  return 1;
> + nbz:
> +  return 0;
> +#else

Since current GLIBC requires GCC 4.7 as minimum compiler I think we
can get rid of snippets for old compilers.  Same for the other
override functios.

> +  unsigned long int ret;
> +  if (sizeof(x) == 8)
> +    asm ("uxor,*sbz %%r0,%1,%%r0\n\tcopy %%r0,%0"
> +	 : "=r"(ret) : "r"(x), "0"(1));
> +  else
> +    asm ("uxor,sbz %%r0,%1,%%r0\n\tcopy %%r0,%0"
> +        : "=r"(ret) : "r"(x), "0"(1));
> +  return ret;
> +#endif
> +}
> +
> +/* Likewise, but for two words simultaneously.  */
> +
> +static inline unsigned long int
> +haszero2(unsigned long int x1, unsigned long int x2)
> +{
> +#if __GNUC_PREREQ(4, 5)
> +  /* It's more useful to expose a control transfer to the compiler
> +     than to expose a proper boolean result.  */
> +  if (sizeof(x1) == 8)
> +    asm goto ("uxor,*sbz %%r0,%0,%%r0\n\t"
> +	      "uxor,*nbz %%r0,%1,%%r0\n\t"
> +	      "b,n %l2" : : "r"(x1), "r"(x2) : : sbz);
> +  else
> +    asm goto ("uxor,sbz %%r0,%0,%%r0\n\t"
> +	      "uxor,nbz %%r0,%1,%%r0\n\t"
> +	      "b,n %l2" : : "r"(x1), "r"(x2) : : sbz);
> +  return 0;
> + sbz:
> +  return 1;
> +#else
> +  unsigned long int ret;
> +  if (sizeof(x1) == 8)
> +    asm ("uxor,*sbz %%r0,%1,%%r0\n\t"
> +	 "uxor,*nbz %%r0,%2,%%r0\n\t"
> +	 "ldi 1,%0"
> +	 : "=r"(ret) : "r"(x1), "r"(x2), "0"(0));
> +  else
> +    asm ("uxor,sbz %%r0,%1,%%r0\n\t"
> +	 "uxor,nbz %%r0,%2,%%r0\n\t"
> +	 "ldi 1,%0"
> +	 : "=r"(ret) : "r"(x1), "r"(x2), "0"(0));
> +  return ret;
> +#endif
> +}
> +
> +#endif /* haszero.h */
> diff --git a/sysdeps/hppa/whichzero.h b/sysdeps/hppa/whichzero.h
> new file mode 100644
> index 0000000..ef18cc7
> --- /dev/null
> +++ b/sysdeps/hppa/whichzero.h
> @@ -0,0 +1,70 @@
> +/* whichzero.h -- functions for zero byte searching.  HPPA version.
> +   Copyright (C) 2016 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +#ifndef HPPA_WHICHZERO_H
> +#define HPPA_WHICHZERO_H 1
> +
> +/* Given a long that is known to contain a zero byte, return the
> +   index of the first such within the long in host memory order.  */
> +
> +static inline unsigned int
> +whichzero(unsigned long int x)
> +{
> +  unsigned int ret;
> +
> +  _Static_assert (sizeof(x) == 4, "64-bit not supported");
> +
> +  /* Since we have no clz insn, direct tests of the bytes is faster
> +     than loading up the constants to do the masking.  */
> +  asm ("extrw,u,<> %1,23,8,%%r0\n\t"
> +       "ldi 2,%0\n\t"
> +       "extrw,u,<> %1,15,8,%%r0\n\t"
> +       "ldi 1,%0\n\t"
> +       "extrw,u,<> %1,7,8,%%r0\n\t"
> +       "ldi 0,%0"
> +       : "=r"(ret) : "r"(x), "0"(3));
> +
> +  return ret;
> +}
> +
> +/* Similarly, but perform the test for two longs simultaneously.  */
> +
> +static inline unsigned int
> +whichzero2(unsigned long int x1, unsigned long int x2)
> +{
> +  unsigned int ret;
> +
> +  _Static_assert (sizeof(x1) == 4, "64-bit not supported");
> +
> +  /* Since we have no clz insn, direct tests of the bytes is faster
> +     than loading up the constants to do the masking.  */
> +  asm ("extrw,u,= %1,23,8,%%r0\n\t"
> +       "extrw,u,<> %2,23,8,%%r0\n\t"
> +       "ldi 2,%0\n\t"
> +       "extrw,u,= %1,15,8,%%r0\n\t"
> +       "extrw,u,<> %2,15,8,%%r0\n\t"
> +       "ldi 1,%0\n\t"
> +       "extrw,u,= %1,7,8,%%r0\n\t"
> +       "extrw,u,<> %2,7,8,%%r0\n\t"
> +       "ldi 0,%0"
> +       : "=r"(ret) : "r"(x1), "r"(x2), "0"(3));
> +
> +  return ret;
> +}
> +
> +#endif /* whichzero.h */

I am far from a hppa expert, but can't we code the same snippet in C? How
bad would it be compared to this optimized asm?


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]