This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH] S390: Do not set FE_INEXACT with feraiseexcept (FE_OWERFLOW|FE_UNDERFLOW).
On 08/22/2016 12:59 PM, Joseph Myers wrote:
On Mon, 22 Aug 2016, Stefan Liebler wrote:
For feraiseexcept I think if the z196-round-instruction is available with used
toolchain then it should be used to avoid FE_INEXACT. Then it is the same
behaviour as on intel.
If it is not available feraiseexcept will use the add/div instructions as
before with the FE_INEXACT flag/exception. And the additional _FPU_GETCW/SETCW
usages are avoided.
What's your suggestion for feraiseexcept?
I don't have a suggestion; I was simply observing that extra steps for
this case (such as the x86 code does) are not actually needed to conform
to the standard.
Okay. Then I've updated the patch. Now it uses z196-round-instruction if
available to omit FE_INEXACT. If it is not available the old behaviour
is used without clearing FE_INEXACT.
Bye
Stefan
ChangeLog:
* config.h.in: (HAVE_S390_MIN_Z196_ZARCH_ASM_SUPPORT)
New undefine.
* sysdeps/s390/configure.ac: Add test for z196 zarch support.
* sysdeps/s390/configure: Regenerated.
* sysdeps/s390/fpu/fraiseexcpt.c (__feraiseexcept): Use ledbra
instruction for raising over-/underflow if z196 zarch is
supported by default.
* sysdeps/s390/fpu/fsetexcptflg.c (fesetexceptflag):
Correct comment.
commit 224637717d53581a081bbb42471c7e91fc137085
Author: Stefan Liebler <stli@linux.vnet.ibm.com>
Date: Tue Aug 23 08:44:26 2016 +0200
S390: Do not set FE_INEXACT with feraiseexcept (FE_OWERFLOW|FE_UNDERFLOW).
On s390 feraiseexcept (FE_OVERFLOW|FE_UNDERFLOW) sets FE_INEXACT, too.
This patch uses z196 zarch load rounded instruction which can suppress
FE_INEXACT exception if gcc has z196 support in used configuration.
Otherwise FE_INEXACT flag is set as before. The gcc support is tested
in a new configure-check.
A comment in fsetexcptflg.c is corrected as new exceptions are not
executed with the next floating-point instruction if fpc is set with
_FPU_SETCW macro. It seems the comment was copied e.g. from
sysdeps/x86_64/fpu/fsetexcptflg.c file.
ChangeLog:
* config.h.in: (HAVE_S390_MIN_Z196_ZARCH_ASM_SUPPORT)
New undefine.
* sysdeps/s390/configure.ac: Add test for z196 zarch support.
* sysdeps/s390/configure: Regenerated.
* sysdeps/s390/fpu/fraiseexcpt.c (__feraiseexcept): Use ledbra
instruction for raising over-/underflow if z196 zarch is supported
by default.
* sysdeps/s390/fpu/fsetexcptflg.c (fesetexceptflag):
Correct comment.
diff --git a/config.h.in b/config.h.in
index 856ef6a..8cd08b0 100644
--- a/config.h.in
+++ b/config.h.in
@@ -70,6 +70,9 @@
/* Define if assembler supports AVX512DQ. */
#undef HAVE_AVX512DQ_ASM_SUPPORT
+/* Define if assembler supports z196 zarch instructions as default on S390. */
+#undef HAVE_S390_MIN_Z196_ZARCH_ASM_SUPPORT
+
/* Define if assembler supports vector instructions on S390. */
#undef HAVE_S390_VX_ASM_SUPPORT
diff --git a/sysdeps/s390/configure b/sysdeps/s390/configure
index c9fb69c..347ac28 100644
--- a/sysdeps/s390/configure
+++ b/sysdeps/s390/configure
@@ -177,5 +177,41 @@ then
fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for S390 z196 zarch instruction support as default" >&5
+$as_echo_n "checking for S390 z196 zarch instruction support as default... " >&6; }
+if ${libc_cv_asm_s390_min_z196_zarch+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat > conftest.c <<\EOF
+float testinsn (double e)
+{
+ float d;
+ __asm__ ("ledbra %0,5,%1,4" : "=f" (d) : "f" (e) );
+ return d;
+}
+EOF
+if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS --shared conftest.c
+ -o conftest.o &> /dev/null'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; } ;
+then
+ libc_cv_asm_s390_min_z196_zarch=yes
+else
+ libc_cv_asm_s390_min_z196_zarch=no
+fi
+rm -f conftest*
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_asm_s390_min_z196_zarch" >&5
+$as_echo "$libc_cv_asm_s390_min_z196_zarch" >&6; }
+
+if test "$libc_cv_asm_s390_min_z196_zarch" = yes ;
+then
+ $as_echo "#define HAVE_S390_MIN_Z196_ZARCH_ASM_SUPPORT 1" >>confdefs.h
+
+fi
+
test -n "$critic_missing" && as_fn_error $? "
*** $critic_missing" "$LINENO" 5
diff --git a/sysdeps/s390/configure.ac b/sysdeps/s390/configure.ac
index 1db6d84..8a782e7 100644
--- a/sysdeps/s390/configure.ac
+++ b/sysdeps/s390/configure.ac
@@ -86,5 +86,31 @@ then
AC_DEFINE(HAVE_S390_VX_GCC_SUPPORT)
fi
+AC_CACHE_CHECK(for S390 z196 zarch instruction support as default,
+ libc_cv_asm_s390_min_z196_zarch, [dnl
+cat > conftest.c <<\EOF
+float testinsn (double e)
+{
+ float d;
+ __asm__ ("ledbra %0,5,%1,4" : "=f" (d) : "f" (e) );
+ return d;
+}
+EOF
+dnl
+dnl test, if assembler supports S390 z196 zarch instructions as default
+if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS --shared conftest.c
+ -o conftest.o &> /dev/null]) ;
+then
+ libc_cv_asm_s390_min_z196_zarch=yes
+else
+ libc_cv_asm_s390_min_z196_zarch=no
+fi
+rm -f conftest* ])
+
+if test "$libc_cv_asm_s390_min_z196_zarch" = yes ;
+then
+ AC_DEFINE(HAVE_S390_MIN_Z196_ZARCH_ASM_SUPPORT)
+fi
+
test -n "$critic_missing" && AC_MSG_ERROR([
*** $critic_missing])
diff --git a/sysdeps/s390/fpu/fraiseexcpt.c b/sysdeps/s390/fpu/fraiseexcpt.c
index 92a1a7d..ac6dfe7 100644
--- a/sysdeps/s390/fpu/fraiseexcpt.c
+++ b/sysdeps/s390/fpu/fraiseexcpt.c
@@ -35,6 +35,23 @@ fexceptadd (float d, float e)
__asm__ __volatile__ ("aebr %0,%1" : : "f" (d), "f" (e) );
}
+#ifdef HAVE_S390_MIN_Z196_ZARCH_ASM_SUPPORT
+static __inline__ void
+fexceptround (double e)
+{
+ float d;
+ /* Load rounded from double to float with M3 = round toward 0, M4 = Suppress
+ IEEE-inexact exception.
+ In case of e=0x1p128 and the overflow-mask bit is zero, only the
+ IEEE-overflow flag is set. If overflow-mask bit is one, DXC field is set to
+ 0x20 "IEEE overflow, exact".
+ In case of e=0x1p-150 and the underflow-mask bit is zero, only the
+ IEEE-underflow flag is set. If underflow-mask bit is one, DXC field is set
+ to 0x10 "IEEE underflow, exact".
+ This instruction is available with a zarch machine >= z196. */
+ __asm__ __volatile__ ("ledbra %0,5,%1,4" : "=f" (d) : "f" (e) );
+}
+#endif
int
__feraiseexcept (int excepts)
@@ -54,13 +71,29 @@ __feraiseexcept (int excepts)
/* Next: overflow. */
if (FE_OVERFLOW & excepts)
- /* I don't think we can do the same trick as intel so we will have
- to live with inexact coming also. */
- fexceptadd (FLT_MAX, 1.0e32);
+ {
+#ifdef HAVE_S390_MIN_Z196_ZARCH_ASM_SUPPORT
+ fexceptround (0x1p128);
+#else
+ /* If overflow-mask bit is zero, both IEEE-overflow and IEEE-inexact flags
+ are set. If overflow-mask bit is one, DXC field is set to 0x2C "IEEE
+ overflow, inexact and incremented". */
+ fexceptadd (FLT_MAX, 1.0e32);
+#endif
+ }
/* Next: underflow. */
if (FE_UNDERFLOW & excepts)
- fexceptdiv (FLT_MIN, 3.0);
+ {
+#ifdef HAVE_S390_MIN_Z196_ZARCH_ASM_SUPPORT
+ fexceptround (0x1p-150);
+#else
+ /* If underflow-mask bit is zero, both IEEE-underflow and IEEE-inexact
+ flags are set. If underflow-mask bit is one, DXC field is set to 0x1C
+ "IEEE underflow, inexact and incremented". */
+ fexceptdiv (FLT_MIN, 3.0);
+#endif
+ }
/* Last: inexact. */
if (FE_INEXACT & excepts)
diff --git a/sysdeps/s390/fpu/fsetexcptflg.c b/sysdeps/s390/fpu/fsetexcptflg.c
index 25ade85..56a52c6 100644
--- a/sysdeps/s390/fpu/fsetexcptflg.c
+++ b/sysdeps/s390/fpu/fsetexcptflg.c
@@ -45,8 +45,7 @@ fesetexceptflag (const fexcept_t *flagp, int excepts)
& newexcepts;
/* Store the new status word (along with the rest of the environment.
- Possibly new exceptions are set but they won't get executed unless
- the next floating-point instruction. */
+ Possibly new exceptions are set but they won't get executed. */
_FPU_SETCW (temp);
/* Success. */