This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] S390: Do not set FE_INEXACT with feraiseexcept (FE_OWERFLOW|FE_UNDERFLOW).


On 08/22/2016 12:59 PM, Joseph Myers wrote:
On Mon, 22 Aug 2016, Stefan Liebler wrote:

For feraiseexcept I think if the z196-round-instruction is available with used
toolchain then it should be used to avoid FE_INEXACT. Then it is the same
behaviour as on intel.
If it is not available feraiseexcept will use the add/div instructions as
before with the FE_INEXACT flag/exception. And the additional _FPU_GETCW/SETCW
usages are avoided.

What's your suggestion for feraiseexcept?

I don't have a suggestion; I was simply observing that extra steps for
this case (such as the x86 code does) are not actually needed to conform
to the standard.

Okay. Then I've updated the patch. Now it uses z196-round-instruction if available to omit FE_INEXACT. If it is not available the old behaviour is used without clearing FE_INEXACT.

Bye
Stefan

ChangeLog:

	* config.h.in: (HAVE_S390_MIN_Z196_ZARCH_ASM_SUPPORT)
	New undefine.
	* sysdeps/s390/configure.ac: Add test for z196 zarch support.
	* sysdeps/s390/configure: Regenerated.
	* sysdeps/s390/fpu/fraiseexcpt.c (__feraiseexcept): Use ledbra
	instruction for raising over-/underflow if z196 zarch is
	supported by default.
	* sysdeps/s390/fpu/fsetexcptflg.c (fesetexceptflag):
	Correct comment.
commit 224637717d53581a081bbb42471c7e91fc137085
Author: Stefan Liebler <stli@linux.vnet.ibm.com>
Date:   Tue Aug 23 08:44:26 2016 +0200

    S390: Do not set FE_INEXACT with feraiseexcept (FE_OWERFLOW|FE_UNDERFLOW).
    
    On s390 feraiseexcept (FE_OVERFLOW|FE_UNDERFLOW) sets FE_INEXACT, too.
    This patch uses z196 zarch load rounded instruction which can suppress
    FE_INEXACT exception if gcc has z196 support in used configuration.
    Otherwise FE_INEXACT flag is set as before. The gcc support is tested
    in a new configure-check.
    
    A comment in fsetexcptflg.c is corrected as new exceptions are not
    executed with the next floating-point instruction if fpc is set with
    _FPU_SETCW macro. It seems the comment was copied e.g. from
    sysdeps/x86_64/fpu/fsetexcptflg.c file.
    
    ChangeLog:
    
    	* config.h.in: (HAVE_S390_MIN_Z196_ZARCH_ASM_SUPPORT)
    	New undefine.
    	* sysdeps/s390/configure.ac: Add test for z196 zarch support.
    	* sysdeps/s390/configure: Regenerated.
    	* sysdeps/s390/fpu/fraiseexcpt.c (__feraiseexcept): Use ledbra
    	instruction for raising over-/underflow if z196 zarch is supported
    	by default.
    	* sysdeps/s390/fpu/fsetexcptflg.c (fesetexceptflag):
    	Correct comment.

diff --git a/config.h.in b/config.h.in
index 856ef6a..8cd08b0 100644
--- a/config.h.in
+++ b/config.h.in
@@ -70,6 +70,9 @@
 /* Define if assembler supports AVX512DQ.  */
 #undef  HAVE_AVX512DQ_ASM_SUPPORT
 
+/* Define if assembler supports z196 zarch instructions as default on S390.  */
+#undef  HAVE_S390_MIN_Z196_ZARCH_ASM_SUPPORT
+
 /* Define if assembler supports vector instructions on S390.  */
 #undef  HAVE_S390_VX_ASM_SUPPORT
 
diff --git a/sysdeps/s390/configure b/sysdeps/s390/configure
index c9fb69c..347ac28 100644
--- a/sysdeps/s390/configure
+++ b/sysdeps/s390/configure
@@ -177,5 +177,41 @@ then
 
 fi
 
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for S390 z196 zarch instruction support as default" >&5
+$as_echo_n "checking for S390 z196 zarch instruction support as default... " >&6; }
+if ${libc_cv_asm_s390_min_z196_zarch+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat > conftest.c <<\EOF
+float testinsn (double e)
+{
+    float d;
+    __asm__ ("ledbra %0,5,%1,4" : "=f" (d) : "f" (e) );
+    return d;
+}
+EOF
+if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS --shared conftest.c
+			-o conftest.o &> /dev/null'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; } ;
+then
+  libc_cv_asm_s390_min_z196_zarch=yes
+else
+  libc_cv_asm_s390_min_z196_zarch=no
+fi
+rm -f conftest*
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_asm_s390_min_z196_zarch" >&5
+$as_echo "$libc_cv_asm_s390_min_z196_zarch" >&6; }
+
+if test "$libc_cv_asm_s390_min_z196_zarch" = yes ;
+then
+  $as_echo "#define HAVE_S390_MIN_Z196_ZARCH_ASM_SUPPORT 1" >>confdefs.h
+
+fi
+
 test -n "$critic_missing" && as_fn_error $? "
 *** $critic_missing" "$LINENO" 5
diff --git a/sysdeps/s390/configure.ac b/sysdeps/s390/configure.ac
index 1db6d84..8a782e7 100644
--- a/sysdeps/s390/configure.ac
+++ b/sysdeps/s390/configure.ac
@@ -86,5 +86,31 @@ then
   AC_DEFINE(HAVE_S390_VX_GCC_SUPPORT)
 fi
 
+AC_CACHE_CHECK(for S390 z196 zarch instruction support as default,
+	       libc_cv_asm_s390_min_z196_zarch, [dnl
+cat > conftest.c <<\EOF
+float testinsn (double e)
+{
+    float d;
+    __asm__ ("ledbra %0,5,%1,4" : "=f" (d) : "f" (e) );
+    return d;
+}
+EOF
+dnl
+dnl test, if assembler supports S390 z196 zarch instructions as default
+if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS --shared conftest.c
+			-o conftest.o &> /dev/null]) ;
+then
+  libc_cv_asm_s390_min_z196_zarch=yes
+else
+  libc_cv_asm_s390_min_z196_zarch=no
+fi
+rm -f conftest* ])
+
+if test "$libc_cv_asm_s390_min_z196_zarch" = yes ;
+then
+  AC_DEFINE(HAVE_S390_MIN_Z196_ZARCH_ASM_SUPPORT)
+fi
+
 test -n "$critic_missing" && AC_MSG_ERROR([
 *** $critic_missing])
diff --git a/sysdeps/s390/fpu/fraiseexcpt.c b/sysdeps/s390/fpu/fraiseexcpt.c
index 92a1a7d..ac6dfe7 100644
--- a/sysdeps/s390/fpu/fraiseexcpt.c
+++ b/sysdeps/s390/fpu/fraiseexcpt.c
@@ -35,6 +35,23 @@ fexceptadd (float d, float e)
   __asm__ __volatile__ ("aebr %0,%1" : : "f" (d), "f" (e) );
 }
 
+#ifdef HAVE_S390_MIN_Z196_ZARCH_ASM_SUPPORT
+static __inline__ void
+fexceptround (double e)
+{
+  float d;
+  /* Load rounded from double to float with M3 = round toward 0, M4 = Suppress
+     IEEE-inexact exception.
+     In case of e=0x1p128 and the overflow-mask bit is zero, only the
+     IEEE-overflow flag is set. If overflow-mask bit is one, DXC field is set to
+     0x20 "IEEE overflow, exact".
+     In case of e=0x1p-150 and the underflow-mask bit is zero, only the
+     IEEE-underflow flag is set. If underflow-mask bit is one, DXC field is set
+     to 0x10 "IEEE underflow, exact".
+     This instruction is available with a zarch machine >= z196.  */
+  __asm__ __volatile__ ("ledbra %0,5,%1,4" : "=f" (d) : "f" (e) );
+}
+#endif
 
 int
 __feraiseexcept (int excepts)
@@ -54,13 +71,29 @@ __feraiseexcept (int excepts)
 
   /* Next: overflow.  */
   if (FE_OVERFLOW & excepts)
-    /* I don't think we can do the same trick as intel so we will have
-       to live with inexact coming also.  */
-    fexceptadd (FLT_MAX, 1.0e32);
+    {
+#ifdef HAVE_S390_MIN_Z196_ZARCH_ASM_SUPPORT
+      fexceptround (0x1p128);
+#else
+      /* If overflow-mask bit is zero, both IEEE-overflow and IEEE-inexact flags
+	 are set.  If overflow-mask bit is one, DXC field is set to 0x2C "IEEE
+	 overflow, inexact and incremented".  */
+      fexceptadd (FLT_MAX, 1.0e32);
+#endif
+    }
 
   /* Next: underflow.  */
   if (FE_UNDERFLOW & excepts)
-    fexceptdiv (FLT_MIN, 3.0);
+    {
+#ifdef HAVE_S390_MIN_Z196_ZARCH_ASM_SUPPORT
+      fexceptround (0x1p-150);
+#else
+      /* If underflow-mask bit is zero, both IEEE-underflow and IEEE-inexact
+	 flags are set.  If underflow-mask bit is one, DXC field is set to 0x1C
+	 "IEEE underflow, inexact and incremented".  */
+      fexceptdiv (FLT_MIN, 3.0);
+#endif
+    }
 
   /* Last: inexact.  */
   if (FE_INEXACT & excepts)
diff --git a/sysdeps/s390/fpu/fsetexcptflg.c b/sysdeps/s390/fpu/fsetexcptflg.c
index 25ade85..56a52c6 100644
--- a/sysdeps/s390/fpu/fsetexcptflg.c
+++ b/sysdeps/s390/fpu/fsetexcptflg.c
@@ -45,8 +45,7 @@ fesetexceptflag (const fexcept_t *flagp, int excepts)
     & newexcepts;
 
   /* Store the new status word (along with the rest of the environment.
-     Possibly new exceptions are set but they won't get executed unless
-     the next floating-point instruction.  */
+     Possibly new exceptions are set but they won't get executed.  */
   _FPU_SETCW (temp);
 
   /* Success.  */

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]