This is the mail archive of the
libc-alpha@sources.redhat.com
mailing list for the glibc project.
powerpc32/fpu optimize space
- From: Alan Modra <amodra at bigpond dot net dot au>
- To: libc-alpha at sources dot redhat dot com
- Date: Tue, 17 May 2005 23:27:33 +0930
- Subject: powerpc32/fpu optimize space
Optimizes fp constants by using floats rather than doubles, and using
one GOT slot for all constants loaded by a particular function.
2005-05-17 Alan Modra <amodra@bigpond.net.au>
* sysdeps/powerpc/powerpc32/fpu/s_ceil.S: Use float constants.
* sysdeps/powerpc/powerpc32/fpu/s_floor.S: Likewise.
* sysdeps/powerpc/powerpc32/fpu/s_rint.S: Likewise.
* sysdeps/powerpc/powerpc32/fpu/s_trunc.S: Likewise.
* sysdeps/powerpc/powerpc32/fpu/s_lround.S: Likewise. Use one GOT
slot to access both constants.
* sysdeps/powerpc/powerpc32/fpu/s_round.S: Likewise.
* sysdeps/powerpc/powerpc32/fpu/s_roundf.S: Likewise.
diff -urp -xCVS libc1/sysdeps/powerpc/powerpc32/fpu/s_ceil.S libc2/sysdeps/powerpc/powerpc32/fpu/s_ceil.S
--- libc1/sysdeps/powerpc/powerpc32/fpu/s_ceil.S 2005-05-16 22:00:59.000000000 +0930
+++ libc2/sysdeps/powerpc/powerpc32/fpu/s_ceil.S 2005-05-17 09:38:38.000000000 +0930
@@ -19,11 +19,10 @@
#include <sysdep.h>
- .section .rodata.cst8,"aM",@progbits,8
- .align 3
+ .section .rodata.cst4,"aM",@progbits,4
+ .align 2
.LC0: /* 2**52 */
- .long 0x43300000
- .long 0
+ .long 0x59800000
.section ".text"
ENTRY (__ceil)
@@ -34,10 +33,10 @@ ENTRY (__ceil)
mflr r10
lwz r9,.LC0@got(10)
mtlr r11
- lfd fp13,0(r9)
+ lfs fp13,0(r9)
#else
lis r9,.LC0@ha
- lfd fp13,.LC0@l(r9)
+ lfs fp13,.LC0@l(r9)
#endif
fabs fp0,fp1
fsub fp12,fp13,fp13 /* generate 0.0 */
diff -urp -xCVS libc1/sysdeps/powerpc/powerpc32/fpu/s_floor.S libc2/sysdeps/powerpc/powerpc32/fpu/s_floor.S
--- libc1/sysdeps/powerpc/powerpc32/fpu/s_floor.S 2005-05-16 22:00:59.000000000 +0930
+++ libc2/sysdeps/powerpc/powerpc32/fpu/s_floor.S 2005-05-17 09:38:17.000000000 +0930
@@ -19,11 +19,10 @@
#include <sysdep.h>
- .section .rodata.cst8,"aM",@progbits,8
- .align 3
+ .section .rodata.cst4,"aM",@progbits,4
+ .align 2
.LC0: /* 2**52 */
- .long 0x43300000
- .long 0
+ .long 0x59800000
.section ".text"
ENTRY (__floor)
@@ -34,10 +33,10 @@ ENTRY (__floor)
mflr r10
lwz r9,.LC0@got(10)
mtlr r11
- lfd fp13,0(r9)
+ lfs fp13,0(r9)
#else
lis r9,.LC0@ha
- lfd fp13,.LC0@l(r9)
+ lfs fp13,.LC0@l(r9)
#endif
fabs fp0,fp1
fsub fp12,fp13,fp13 /* generate 0.0 */
diff -urp -xCVS libc1/sysdeps/powerpc/powerpc32/fpu/s_lround.S libc2/sysdeps/powerpc/powerpc32/fpu/s_lround.S
--- libc1/sysdeps/powerpc/powerpc32/fpu/s_lround.S 2005-05-16 22:00:59.000000000 +0930
+++ libc2/sysdeps/powerpc/powerpc32/fpu/s_lround.S 2005-05-17 09:37:58.000000000 +0930
@@ -20,13 +20,11 @@
#include <sysdep.h>
.section .rodata.cst8,"aM",@progbits,8
- .align 3
+ .align 2
.LC0: /* 0.0 */
.long 0x00000000
- .long 0
.LC1: /* 0.5 */
- .long 0x3fe00000
- .long 0
+ .long 0x3f000000
.section ".text"
@@ -47,17 +45,16 @@ ENTRY (__lround)
mflr r10
lwz r9,.LC0@got(10)
mtlr r11
- lfd fp12,0(r9)
+ lfs fp12,0(r9)
#else
lis r9,.LC0@ha
- lfd fp12,.LC0@l(r9)
+ lfs fp12,.LC0@l(r9)
#endif
#ifdef SHARED
- lwz r9,.LC1@got(10)
- lfd fp10,0(r9)
+ lfs fp10,.LC1-.LC0(r9)
#else
lis r9,.LC1@ha
- lfd fp10,.LC1@l(r9)
+ lfs fp10,.LC1@l(r9)
#endif
fcmpu cr6,fp1,fp12 /* if (x > 0.0) */
ble- cr6,.L4
diff -urp -xCVS libc1/sysdeps/powerpc/powerpc32/fpu/s_rint.S libc2/sysdeps/powerpc/powerpc32/fpu/s_rint.S
--- libc1/sysdeps/powerpc/powerpc32/fpu/s_rint.S 2005-05-16 22:00:59.000000000 +0930
+++ libc2/sysdeps/powerpc/powerpc32/fpu/s_rint.S 2005-05-17 09:37:07.000000000 +0930
@@ -22,11 +22,10 @@
#include <sysdep.h>
- .section .rodata.cst8,"aM",@progbits,8
- .align 3
+ .section .rodata.cst4,"aM",@progbits,4
+ .align 2
.LC0: /* 2**52 */
- .long 0x43300000
- .long 0
+ .long 0x59800000
.section ".text"
ENTRY (__rint)
@@ -36,10 +35,10 @@ ENTRY (__rint)
mflr r10
lwz r9,.LC0@got(10)
mtlr r11
- lfd fp13,0(r9)
+ lfs fp13,0(r9)
#else
lis r9,.LC0@ha
- lfd fp13,.LC0@l(r9)
+ lfs fp13,.LC0@l(r9)
#endif
fabs fp0,fp1
fsub fp12,fp13,fp13 /* generate 0.0 */
diff -urp -xCVS libc1/sysdeps/powerpc/powerpc32/fpu/s_round.S libc2/sysdeps/powerpc/powerpc32/fpu/s_round.S
--- libc1/sysdeps/powerpc/powerpc32/fpu/s_round.S 2005-05-16 22:00:59.000000000 +0930
+++ libc2/sysdeps/powerpc/powerpc32/fpu/s_round.S 2005-05-17 09:36:50.000000000 +0930
@@ -20,13 +20,11 @@
#include <sysdep.h>
.section .rodata.cst8,"aM",@progbits,8
- .align 3
+ .align 2
.LC0: /* 2**52 */
- .long 0x43300000
- .long 0
+ .long 0x59800000
.LC1: /* 0.5 */
- .long 0x3fe00000
- .long 0
+ .long 0x3f000000
/* double [fp1] round (double x [fp1])
IEEE 1003.1 round function. IEEE specifies "round to the nearest
@@ -47,10 +45,10 @@ ENTRY (__round)
mflr r10
lwz r9,.LC0@got(10)
mtlr r11
- lfd fp13,0(r9)
+ lfs fp13,0(r9)
#else
lis r9,.LC0@ha
- lfd fp13,.LC0@l(r9)
+ lfs fp13,.LC0@l(r9)
#endif
fabs fp0,fp1
fsub fp12,fp13,fp13 /* generate 0.0 */
@@ -59,11 +57,10 @@ ENTRY (__round)
bnllr- cr7
mtfsfi 7,1 /* Set rounding mode toward 0. */
#ifdef SHARED
- lwz r9,.LC1@got(10)
- lfd fp10,0(r9)
+ lfs fp10,.LC1-.LC0(r9)
#else
lis r9,.LC1@ha
- lfd fp10,.LC1@l(r9)
+ lfs fp10,.LC1@l(r9)
#endif
ble- cr6,.L4
fadd fp1,fp1,fp10 /* x+= 0.5; */
diff -urp -xCVS libc1/sysdeps/powerpc/powerpc32/fpu/s_roundf.S libc2/sysdeps/powerpc/powerpc32/fpu/s_roundf.S
--- libc1/sysdeps/powerpc/powerpc32/fpu/s_roundf.S 2005-05-16 22:00:59.000000000 +0930
+++ libc2/sysdeps/powerpc/powerpc32/fpu/s_roundf.S 2005-05-17 09:36:30.000000000 +0930
@@ -19,8 +19,8 @@
#include <sysdep.h>
- .section .rodata.cst4,"aM",@progbits,4
- .align 2
+ .section .rodata.cst8,"aM",@progbits,8
+ .align 2
.LC0: /* 2**23 */
.long 0x4b000000
.LC1: /* 0.5 */
@@ -57,8 +57,7 @@ ENTRY (__roundf )
bnllr- cr7
mtfsfi 7,1 /* Set rounding mode toward 0. */
#ifdef SHARED
- lwz r9,.LC1@got(10)
- lfs fp10,0(r9)
+ lfs fp10,.LC1-.LC0(r9)
#else
lis r9,.LC1@ha
lfs fp10,.LC1@l(r9)
diff -urp -xCVS libc1/sysdeps/powerpc/powerpc32/fpu/s_trunc.S libc2/sysdeps/powerpc/powerpc32/fpu/s_trunc.S
--- libc1/sysdeps/powerpc/powerpc32/fpu/s_trunc.S 2005-05-16 22:00:59.000000000 +0930
+++ libc2/sysdeps/powerpc/powerpc32/fpu/s_trunc.S 2005-05-17 09:35:54.000000000 +0930
@@ -19,11 +19,10 @@
#include <sysdep.h>
- .section .rodata.cst8,"aM",@progbits,8
- .align 3
+ .section .rodata.cst4,"aM",@progbits,4
+ .align 2
.LC0: /* 2**52 */
- .long 0x43300000
- .long 0
+ .long 0x59800000
/* double [fp1] trunc (double x [fp1])
IEEE 1003.1 trunc function. IEEE specifies "trunc to the integer
@@ -41,10 +40,10 @@ ENTRY (__trunc)
mflr r10
lwz r9,.LC0@got(10)
mtlr r11
- lfd fp13,0(r9)
+ lfs fp13,0(r9)
#else
lis r9,.LC0@ha
- lfd fp13,.LC0@l(r9)
+ lfs fp13,.LC0@l(r9)
#endif
fabs fp0,fp1
fsub fp12,fp13,fp13 /* generate 0.0 */
--
Alan Modra
IBM OzLabs - Linux Technology Centre