This is the mail archive of the crossgcc@sources.redhat.com mailing list for the crossgcc project.
See the CrossGCC FAQ for lots more information.
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |
Other format: | [Raw text] |
Hi, I've made a patch for GCC using soft-float primitives for PPC 4xx taken from the IBM PowerPC Performance Libraries Project (http://www-124.ibm.com/developerworks/projects/ppcperflib/). The project is licensed with BSD License terms, so I don't know the chances of this patch being merged into the GCC project. It replaces the soft float emulation of GCC used for PPC targets. I did not run the GCC regression tests and I'm still working on this. But I did ran some of the IEEE 754 floating-point tests (i.e. paranoia) on a GCC 3.3.2 crosstool-0.25 generated toolchain for 405 and everything seemed fine. I do not have access to a 440 right now, but I've modified the config.gcc file so it applies to both 405 and 440. You can expect an average 300% performance gain for float ops -- Xavier Miville xmiville@oerlikon.ca
diff -urN gcc-3.3.2/gcc/config/rs6000/4xx/fpeLib.inc gcc-3.3.2tempxav/gcc/config/rs6000/4xx/fpeLib.inc --- gcc-3.3.2/gcc/config/rs6000/4xx/fpeLib.inc 1969-12-31 19:00:00.000000000 -0500 +++ gcc-3.3.2tempxav/gcc/config/rs6000/4xx/fpeLib.inc 2004-01-23 14:09:42.000000000 -0500 @@ -0,0 +1,61 @@ +/* fpopt/fpeLib.inc, pl_FPE_common, pl_linux 11/24/03 16:15:21 */ +/*----------------------------------------------------------------------------- */ +/* Copyright (c) 2003, IBM Corporation */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* * Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* * Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* * Neither the name of IBM nor the names of its contributors */ +/* may be used to endorse or promote products derived from this */ +/* software without specific prior written permission. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ +/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ +/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, */ +/* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, */ +/* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR */ +/* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY */ +/* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE */ +/* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/*----------------------------------------------------------------------------- */ + +#ifndef fpeLib_inc +#define fpeLib_inc + +/* Stack frame offsets. FP register area starts after back chain and LR save. */ +/* Space to save non-volatile registers 16-31 starts after FP register area. */ +/* Stack frame needs to be a multiple of 8 bytes for EABI */ + +#define NUMB_SAVEREGS 16 +#define STACKSIZE ((NUMB_SAVEREGS * 4) + 8) +#define SAVEREG_BASE 8 + +#define SAVEREG(rn) stw r##rn, (SAVEREG_BASE + (rn - 16) * 4)(r1) +#define RESTREG(rn) lwz r##rn, (SAVEREG_BASE + (rn - 16) * 4)(r1) + +#define cr6_sign 24 +#define cr6_inf 25 +#define cr6_zero 26 +#define cr6_NaN 27 + +#define cr7_sign 28 +#define cr7_inf 29 +#define cr7_zero 30 +#define cr7_NaN 31 + +#endif + diff -urN gcc-3.3.2/gcc/config/rs6000/4xx/lib1funcs_4xx.S gcc-3.3.2tempxav/gcc/config/rs6000/4xx/lib1funcs_4xx.S --- gcc-3.3.2/gcc/config/rs6000/4xx/lib1funcs_4xx.S 1969-12-31 19:00:00.000000000 -0500 +++ gcc-3.3.2tempxav/gcc/config/rs6000/4xx/lib1funcs_4xx.S 2004-02-03 09:31:09.000000000 -0500 @@ -0,0 +1,4393 @@ + +#ifdef L__truncdfsf2 + +/* fpopt/ppc_dtof.S, pl_FPE_common, pl_linux 11/24/03 16:17:23 */ +/*----------------------------------------------------------------------------- */ +/* Copyright (c) 2003, IBM Corporation */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* * Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* * Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* * Neither the name of IBM nor the names of its contributors */ +/* may be used to endorse or promote products derived from this */ +/* software without specific prior written permission. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ +/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ +/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, */ +/* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, */ +/* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR */ +/* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY */ +/* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE */ +/* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/*----------------------------------------------------------------------------- */ +/* */ +/* Function: convert double floating point to single floating point */ +/* Input: r3,r4 */ +/* Output: r3 */ +/* Notes: 1. No stack frame is created for this function, so the following */ +/* registers must be preserved, as required by ABI specification: */ +/* LR, CR0, R1, R2, R13-R31 */ +/* 2. operation performed according to IEEE754-1985 standard with */ +/* rounding mode = nearest even. */ +/* */ +/*----------------------------------------------------------------------------- */ + +#include "ppc4xx.inc" +#include "fpeLib.inc" + +function_prolog(__truncdfsf2) + +/* use r11 to save CR */ + mfcr r11 + +/* r8 = exp r9 = hifrac r4 = lofrac */ + rlwinm r9,r3,3,0x007FFFF8 /* word[9:28] = FRS[12:31] */ + rlwimi r9,r4,3,0x00000007 /* word[29:31] = FRS[32:34] */ + rlwinm. r8,r3,12,0x000007FF /* extract exponent */ + rlwinm r5,r3,0,0x80000000 /* pick up sign */ + +/* exponent test */ + beq maybe_Zero /* if exponent=0 */ + cmpwi cr0,r8,896 + ble dN /* denormalized number */ + cmpwi cr0,r8,1150 + bgt special /* infinity or NaN or overflow */ + +reg_num: /* Normal numbers */ +/* do rounding, guard = bit3, round = bit4, sticky=bits5-31 of frs.lo */ + rlwinm r4,r4,0,0x1fffffff /* isolate grs and lower bits */ + lis r6,0x1000 /* guard bit mask */ + cmplw cr1,r4,r6 /* cr1 = guard bit set */ + blt cr1,noround /* no guard, so no round */ + andi. r0,r9,0x00000001 /* if frs.lobit==0 && t */ + crand cr0_2,cr0_2,cr1_2 /* no guard or sticky */ + beq noround + addic. r9,r9,1 /* round up */ +/* check for carry out from rounding */ + lis r6,0x0080 + cmpw cr0,r9,r6 + bne noround + addi r8,r8,1 /* increment exponent */ +noround: + addi r8,r8,-896 + rlwinm r3,r5,0,0x80000000 /* pick up sign in result */ + rlwimi r3,r8,23,0x7f800000 /* pick up exponent */ + rlwimi r3,r9,0,0x007fffff /* pick up rounded fraction */ + mtcr r11 /* restore condition register */ + blr + +maybe_Zero: + or. r0,r9,r4 /* test hi and low frac against 0 */ + bne dN /* no, so handle as denormal */ + mr r3,r9 + rlwimi r3,r5,0,0x80000000 /* move in saved sign */ + mtcr r11 /* restore condition register */ + blr + +dN: /* denormal */ +/* denormalized op here */ + addi r8,r8,-1023 /* exp = exp - 1023 */ +/* use reg 9, 4 as frac pair (only r9 contributes to final result) */ + rlwinm r9,r3,11,0x7FFFF800 /* r9[1:20] = r3[12:31] */ + rlwimi r9,r4,11,0x000007FF /* r9[21:31] = r4[0:10] */ + rlwinm. r4,r4,0,0x001fffff + cror cr2_2,cr0_2,cr0_2 /* set ~sticky flag for rounding later */ + oris r9,r9,0x8000 /* set hi order bit of r9 (implicit one)*/ + b test +loop: +/* shift frac right by 1, zero fill */ + andi. r0,r9,0x0001 /* losing precision */ + crand cr2_2,cr2_2,cr0_2 /* ~sticky bit flag */ + rlwinm r9,r9,31,0x7FFFFFFF /* shift hifrac right by 1, zero fill */ + addi r8,r8,1 /* increment exponent */ +test: + cmpwi cr0,r8,-126 + blt loop + +/* do rounding, guard = bit24, round = bit25, sticky=bits26-31 and frs.lo */ + rlwinm r7,r9,0,0x000000ff /* isolate grs and lower bits */ + li r6,0x0080 /* guard bit mask */ + cmplw cr1,r7,r6 /* cr1 = guard bit set */ + blt cr1,noround2 /* no guard, so no round */ + andi. r0,r9,0x00000100 /* if frs.lobit==0 && t */ + crand cr0_2,cr0_2,cr1_2 /* no guard or sticky */ + crand cr0_2,cr0_2,cr2_2 + beq noround2 + addic. r9,r9,0x00000100 /* round up */ +/* check for carry out from rounding */ + bge noround2 + oris r5,r5,0x0080 /* biased exponent = 1 */ + +noround2: + rlwimi r5,r9,24,0x007FFFFF /* word[9:31] = frac[1:23] */ + mr r3,r5 /* results register */ + mtcr r11 /* restore condition register */ + blr + +special: + cmpwi cr0,r8,0x07ff + beq inf_or_nan + /* rounding case force to infinity */ + oris r3,r5,0x7f80 /* combine sign and infinity */ + mtcr r11 /* restore condition register */ + blr + +inf_or_nan: + or. r0,r9,r4 /* test hi and low frac against 0 */ + bne nan + oris r3,r5,0x7f80 /* combine sign and infinity */ + mtcr r11 /* restore condition register */ + blr + +nan: + oris r3,r5,0x7fc0 /* or in QNAN value */ + mtcr r11 /* restore condition register */ + blr + +function_epilog(__truncdfsf2) + +#endif + +#ifdef L__fixdfsi + +/* fpopt/ppc_dtoi.S, pl_FPE_common, pl_linux 11/24/03 16:17:24 */ +/*----------------------------------------------------------------------------- */ +/* Copyright (c) 2003, IBM Corporation */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* * Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* * Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* * Neither the name of IBM nor the names of its contributors */ +/* may be used to endorse or promote products derived from this */ +/* software without specific prior written permission. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ +/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ +/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, */ +/* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, */ +/* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR */ +/* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY */ +/* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE */ +/* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/*----------------------------------------------------------------------------- */ +/* */ +/* Function: convert double floating point to 4-byte integer */ +/* Input: r3,r4 */ +/* Output: r3 */ +/* Notes: 1. No stack frame is created for this function, so the following */ +/* registers must be preserved, as required by ABI specification: */ +/* LR, CR0, R1, R2, R13-R31 */ +/* 2. operation performed according to IEEE754-1985 standard. */ +/* 3. ISO/ANSI C requires that conversions to integers be rounded */ +/* toward zero (in short, leftover fraction bits are truncated). */ +/* */ +/*----------------------------------------------------------------------------- */ + +#include "ppc4xx.inc" +#include "fpeLib.inc" + +function_prolog(__fixdfsi) + +/* Save CR in r6 */ + mfcr r6 +/* Load up registers with source double */ +/* r8 = exp r9 = hifrac r10 = lofrac */ + rlwinm r9,r3,0,0x000FFFFF /* extract hifrac */ + rlwinm r8,r3,12,0x000007FF /* extract exponent */ +/* exponent test for Infinity or xNAN */ + cmpi cr0,0,r8,2047 /* test for Infinity or NAN */ + beq Infinity_or_NAN +/* exponent test for large operand */ + cmpi cr0,0,r8,1053 /* test for large operand */ + bgt Large_operand +/* Normal operand processing here */ +/* r9 is fraction, left justified */ + rlwinm r9,r9,10,0x3FFFFc00 /* r9[1:20] = r9[12:31] */ + rlwimi r9,r4,10,0x000003FF /* r9[21:31] = r4[0:10] */ + cmpi cr0,0,r8,0 /* test exp for 0 */ + addic. r8,r8,-1022 /* exp = exp - 1022 */ + beq Zero_exp + addic. r8,r8,-1 /* exp = exp - 1023 */ + oris r9,r9,0x4000 /* set hi order bit of r9 */ +Zero_exp: + blt Zeros /* fraction only, return 0 */ + subfic r0,r8,31 + or. r0,r0,r0 + mtctr r0 + b ltest +cloop: +/* shift frac right by 1, zero fill */ + rlwinm r9,r9,31,0x7FFFFFFF /* shift hifrac right by 1, zero fill */ +ltest: + bdnz cloop + +/* Form final result */ + or. r3,r3,r3 /* set CR for sign bit */ + bge fin_pos +/* sign is negative */ + nand r9,r9,r9 /* ones comp of r9 */ + addi r9,r9,1 /* now 2's complement */ +fin_pos: +/* check size of result */ + or. r3,r9,r9 /* check for zero and set return value */ + beq Zeros + +return_results: /* r3 contains signed integer result */ + mtcr r6 /* restore CR */ + blr + +/* Infinity or a NAN */ +Infinity_or_NAN: + or. r0,r9,r4 /* test rest for zero (infinity test) */ + bne NAN +/* Infinity if here (also, Large operand) */ +Large_operand: + or. r3,r3,r3 + blt Neg_infinity +/* form value for converted positive infinity */ + lis r3,0x7FFF + ori r3,r3,0xFFFF + b return_results +Neg_infinity: + lis r3,0x8000 + b return_results +NAN: + lis r3,0x8000 + b return_results +Zeros: + li r3,0 /* Store zero */ + b return_results + +function_epilog(__fixdfsi) + +#endif + +#ifdef L__fixunsdfsi + +/* fpopt/ppc_dtoui.S, pl_FPE_common, pl_linux 11/24/03 16:19:32 */ +/*----------------------------------------------------------------------------- */ +/* Copyright (c) 2003, IBM Corporation */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* * Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* * Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* * Neither the name of IBM nor the names of its contributors */ +/* may be used to endorse or promote products derived from this */ +/* software without specific prior written permission. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ +/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ +/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, */ +/* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, */ +/* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR */ +/* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY */ +/* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE */ +/* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/*----------------------------------------------------------------------------- */ +/* */ +/* Function: Convert double floating point to 4-byte unsigned integer. */ +/* Input: r3,r4(fpa) */ +/* Output: r3 */ +/* Notes: 1. A stack frame is created, following ABI specification, and */ +/* non-volatile registers are saved and restored on the stack. */ +/* 2. This is not a standard IEEE operation. */ +/* 3. Algorithm (Wtype = signed integer and Wtype_MIN=0x80000000) */ +/* if (fpa >= - (Wtype_MIN) */ +/* return (Wtype) (fpa + Wtype_MIN) - Wtype_MIN; */ +/* return (Wtype) a; */ +/* */ +/*----------------------------------------------------------------------------- */ + +#include "ppc4xx.inc" +#include "fpeLib.inc" + +function_prolog(__fixunsdfsi) + +/* For negative numbers, just convert to signed integer */ + or. r3,r3,r3 + blt __fixdfsi /* return directly from there */ + + rlwinm r8,r3,12,0x000007FF /* extract exponent */ +/* For numbers less than maximum positive signed integer, convert normally */ + cmpwi r8, 0x041e + blt __fixdfsi /* return directly form there */ + +/* Create stack frame, just to save link register! */ + mflr r0 /* save link register in caller's stack */ + stw r0,4(r1) + stwu r1,-STACKSIZE(r1) /* set up stack frame to hold saved regs */ + +/* floating add the minimum signed integer value */ + lis r5,0xC1E0 + xor r6,r6,r6 + bl __adddf3 + +/* convert result to signed integer */ + bl __fixdfsi + +/* add minimum signed integer value */ + addis r3,r3,0x8000 + +/* restore link register, and return */ + lwz r1,0(r1) /* previous stack frame */ + lwz r0,4(r1) /* saved link register */ + mtlr r0 /* restore link register */ + blr /* return */ +function_epilog(__fixunsdfsi) + +#endif + +#ifdef L__adddf3 + +/* fpopt/ppc_fadd.S, pl_FPE_common, pl_linux 11/24/03 16:17:25 */ +/*----------------------------------------------------------------------------- */ +/* Copyright (c) 2003, IBM Corporation */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* * Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* * Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* * Neither the name of IBM nor the names of its contributors */ +/* may be used to endorse or promote products derived from this */ +/* software without specific prior written permission. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ +/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ +/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, */ +/* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, */ +/* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR */ +/* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY */ +/* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE */ +/* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/*----------------------------------------------------------------------------- */ +/* */ +/* Function: add two double floating point numbers. frt = fpa + fpb */ +/* Input: r3,r4(fpa) */ +/* r5,r6(fpb) */ +/* Output: r3,r4(frt) */ +/* Notes: 1. No stack frame is created for this function, so the following */ +/* registers must be preserved, as required by ABI specification: */ +/* LR, CR0, R1, R2, R13-R31 */ +/* 2. operation performed according to IEEE754-1985 standard with */ +/* rounding mode = nearest even. */ +/* 3. This file contains code common to both addition and subtraction. */ +/* 4. Eventually, the smaller magnitude argument (based on the high */ +/* word only) will be in <r9,r10>, and the larger in <r8, r7>. */ +/* GPRs <r9-r12> are used as a 128-bit register for developing the */ +/* sum of the fractions. The signs of the arguments are dumped */ +/* into the CR, and logical operations are used to decide if the */ +/* smaller argument should be subtracted. */ +/* CR4 is used to remember the sign of the "big" argument, and */ +/* whether the signs of the args were different. */ +/* */ +/*----------------------------------------------------------------------------- */ + +#define cr4_lt 16 +#define cr4_eq 18 +#define cr6_lt 24 +#define cr7_lt 28 + +#include "ppc4xx.inc" + +function_prolog(__adddf3) + + mr r7,r5 /* load B(high) */ +/* */ +/* from here on is common code */ +/* */ + .globl _ppc_fadd_common +_ppc_fadd_common: + mfcr r0 /* save cr */ + mtctr r0 + mr r8,r3 /* load A(high) */ + rlwinm r0,r8,12,0x7FF /* isolate exponent of A */ + cmpwi cr0,r0,0x7FF /* check A for INF/NaN */ + rlwinm r12,r7,12,0x7FF /* isolate exponent of B */ + cmpwi cr1,r12,0x7FF /* check B for INF/NaN */ + cmpwi cr6,r8,0 /* CR6_lt == fra sign bit */ + beq cr0,A_is_spec /* A is INF/NaN */ + cmpwi cr7,r7,0 /* CR7_lt == frb sign bit */ + beq cr1,B_is_spec /* B is INF/NaN */ +/* */ +/* Neither A nor B is INF or NaN. Signs are captured in CR6 & CR7. */ +/* */ + rlwinm r8,r8,0,0x7FFFFFFF /* strip signs */ + rlwinm r7,r7,0,0x7FFFFFFF /* strip signs */ + cmpw cr0,r8,r7 /* compare A(high) with B(high) */ + blt cr0,A_small /* A operand is smaller */ +/* */ + rlwinm r9,r7,0,0x000FFFFF /* high fraction into r9 */ + mr r10,r6 /* low fraction to r10 */ + cror cr4_lt,cr6_lt,cr6_lt /* sign of result is sign of A */ + rlwinm r6,r7,12,0x7FF /* exp(B) to r6 */ + mr r7,r4 /* low fraction of A to r7 */ + crxor cr4_eq,cr6_lt,cr7_lt /* true iff signs differ */ + rlwinm r5,r8,12,0x7FF /* exp(A) to r5 */ + rlwinm r8,r8,0,0x000FFFFF /* high fraction (A) to r8 */ + b ready /* jump to common code */ +/* */ +A_small: + rlwinm r9,r8,0,0x000FFFFF /* high fraction into r9 */ + mr r10,r4 /* low fraction to r10 */ + cror cr4_lt,cr7_lt,cr7_lt /* sign of result is sign of B */ + rlwinm r8,r7,0,0x000FFFFF /* high fraction (B) to r8 */ + rlwinm r5,r7,12,0x7FF /* exp(B) to r5 */ + crxor cr4_eq,cr6_lt,cr7_lt /* true iff signs differ */ + mr r7,r6 /* low fraction of B to r7 */ + ori r6,r0,0 /* copy exp(A) to r6 */ +/* */ +/* now things are set up */ +/* r5 = exp(big), r8 = high frac(big), r7 = low frac(big) */ +/* r6 = exp(small), r9 = high frac(small), r10 = low frac(small) */ +/* */ +ready: + cmpwi cr0,r6,0 /* check for denorm/Zero */ + cmpwi cr1,r5,0 + oris r8,r8,0x0010 /* materialize "hidden" bit */ + oris r9,r9,0x0010 /* materialize "hidden" bit */ + bne cr0,adone /* is small special? */ + xoris r9,r9,0x0010 /* yes. clear the leading bit */ + addi r6,r6,1 /* and correct exponent */ +adone: + xor r12,r12,r12 /* clear register */ + bne cr1,bdone /* is big special? */ + xoris r8,r8,0x0010 /* yes. clear the leading bit */ + addi r5,r5,1 /* and correct exponent */ +bdone: +/* */ +/* now pre-shift "small" so exponents match */ +/* */ + subfc. r0,r6,r5 /* get difference in exponents */ + cmpwi cr1,r0,54 /* check max pre-shift amount */ + cmpwi cr2,r0,32 /* */ + xor r11,r11,r11 /* clear register (wasted if pre-alignment needed) */ + beq cr0,aligned /* equal */ + rlwinm. r6,r0,0,0x1F /* get part-word shift amount */ + bgt cr1,ret_big /* difference > 53 - "big" is result */ + beq cr0,check_32 /* make sure some shift amount mod 32 */ + + subfic r12,r6,32 /* get complementary shift amount */ + slw r11,r10,r12 /* */ + srw r10,r10,r6 /* shift */ + slw r12,r9,r12 /* */ + or r10,r10,r12 + srw r9,r9,r6 /* */ + +check_32: + xor r12,r12,r12 /* clear register (again) */ + blt cr2,aligned /* shift at least 32 bits? */ + + ori r12,r11,0 /* move r11 to r12 */ + ori r11,r10,0 /* move r10 to r11 */ + ori r10,r9,0 /* move r9 to r10 */ + xor r9,r9,r9 /* zero r9 */ +/* */ +aligned: + beq cr4,do_sub /* do subtract if signs different */ + addc r10,r10,r7 /* add the fractions */ + adde r9,r9,r8 /* */ +/* */ + cntlzw r0,r9 /* check leading zero bits */ + cmpwi cr0,r0,11 /* */ + beq cr0,do_round /* just right. round to double */ + bgt cr0,do_sign /* unnormalized - must be zero or denorm */ +/* */ +right_1: + rlwinm r0,r11,0,0x1 /* get last bit */ + rlwinm r11,r11,31,0x7FFFFFFF /* right shift 1 */ + rlwimi r11,r10,31,0x80000000 /* */ + rlwinm r10,r10,31,0x7FFFFFFF /* right shift 1 */ + rlwimi r10,r9,31,0x80000000 /* */ + addi r5,r5,1 /* bump exponent */ + cmpli cr2,r5,0x7fe /* is exponent OK? */ + srwi r9,r9,1 /* */ + or r12,r12,r0 /* bag up last bit from r11 */ + bgt cr2,out_of_range /* final exp is > max */ +/* */ +/* If we get here, the exponent is known to be within range */ +/* Reason: "big" must have had a finite exponent, and the only */ +/* place the result exponent can be bumped is just above. */ +/* Further, potential underflow is being handled in the */ +/* unnormalized sum code. */ +/* */ +do_round: + addis r0,r0,0x8000 /* get word with only sign bit on */ + cmpl cr7,r11,r0 /* test bits to be discarded */ + cmpi cr6,r12,0 /* check low-order word */ + rlwimi r9,r5,20,0x7FF00000 /* insert final exponent */ + blt cr7,do_sign /* <1/2.. no change */ + addi r0,r0,1 /* get a 1 in R0 */ + bgt cr7,add_1 /* >1/2.. round up */ + bne cr6,add_1 /* > 1/2 */ + rlwinm r0,r10,0,0x1 /* get last bit of fraction */ +add_1: + addc r10,r10,r0 /* round. */ + addze r9,r9 /* carry. (carry into exp is correct) */ +/* */ +/* Common exit point. Final DP value is in r9.r10, except for the sign bit. */ +/* */ +do_sign: + mfcr r6 /* copy CR to r6 */ + rlwimi r9,r6,16,0x80000000 /* final sign bit is in CR4 lt (bit 16) */ +do_store: + mfctr r7 /* get saved CR */ + mtcr r7 /* restore saved CR */ + mr r3,r9 /* high word already in r3 */ + mr r4,r10 /* store low word */ + blr /* and return */ +/* */ +/* */ +ret_big: + mr r9,r8 /* just return "big" */ + mr r10,r7 + rlwimi r9,r5,20,0x7FF00000 /* jam in the exponent for big */ + b do_sign +/* */ +/* Here when signs differ and a subtract must be performed */ +/* */ +do_sub: + subfic r12,r12,0 /* yes. subtract "small" from "big" */ + subfze r11,r11 + subfe r10,r10,r7 + subfe. r9,r9,r8 + bge cr0,sign_ok /* was "small" actually > "big"? */ + /* NB: r11,r12 must be zero */ + /* and r9 becomes all zero */ + subfic r10,r10,0 + subfze r9,r9 + crnor cr4_lt,cr4_lt,cr4_lt /* flip sign of result */ + addi r7,0,32 /* load leading bit count */ + b unnorm_res /* must have at least 20 leading zero bits */ +/* */ +sign_ok: + or r0,r11,r12 /* first step in zero check */ + bgt cr0,normalize /* if > 0, then it's not Zero */ + or. r0,r0,r10 /* if r9 is zero, and all the others too */ + beq cr0,res_Zero /* result is exactly zero */ +/* */ +normalize: + cntlzw r7,r9 /* check leading zero bits */ + cmpwi cr0,r7,11 /* */ + ble cr0,do_round /* just right. round to double */ + /* (can only have diminished fraction) */ +/* */ +/* Here when the result (fraction) is unnormalized. */ +/* */ +unnorm_res: + cmpwi cr1,r7,32 /* see if first bit is in high word */ + cntlzw r0,r10 /* count leading bits in low */ + addi r7,r7,-11 /* get normalization shift amount */ + bne cr1,getexp /* is high == 0, then leading bit is in low */ + add r7,r7,r0 /* get true count */ +getexp: + subfc r5,r7,r5 /* get correct exponent */ + cmpwi cr2,r5,1 /* check for minimum */ + bge cr2,shift_ok /* OK? */ + add r7,r7,r5 /* No. add -ve exp to shift amount */ + addi r7,r7,-1 /* account for min exp == 1 */ + addi r5,0,0 /* new exp is minimum */ +/* */ +shift_ok: + cmpwi cr0,r7,32 /* at least whole word? */ + rlwinm r0,r7,0,0x1F /* compute part-word shift count */ + blt cr0,part_word /* is shift at least 1 word? */ + ori r9,r10,0 /* yes */ + ori r10,r11,0 + xor r11,r11,r11 /* zero r11 */ +part_word: /* r0 contains part-word norm shift count */ + cmpwi cr3,r7,1 /* was shift amount == 1? */ + subfic r8,r0,32 /* get complementary amount */ + slw r9,r9,r0 /* shift high word */ + srw r6,r10,r8 + or r9,r9,r6 /* */ + slw r10,r10,r0 + srw r6,r11,r8 + or r10,r10,r6 + slw r11,r11,r0 /* shifted <r9, r10, r11> left by (r0) */ +/* */ + beq cr3,do_round /* norm count == 1. just do usual rounding */ + rlwimi r9,r5,20,0x7FF00000 /* insert final exponent */ + b do_sign /* there can be no bits to round */ +/* */ +/* */ +/* When result is exactly zero, there can be two reasons: */ +/* - the operands were identical, but of opposite signs, in */ +/* which case the result must be +0 */ +/* - the operands were both zero to begin with. x + x is required */ +/* have the same sign as x, so (+0) + (+0) => +0, and */ +/* (-0) + (-0) => -0, but */ +/* (+0) + (-0) => +0 */ +/* the lt bit of CR4 is the sign of the "big" operand, and the eq bit */ +/* is set on iff the operand signs were different. We must always return +0 */ +/* when the signs are different, and otherwise the sign of the "big" operand */ +/* */ +res_Zero: + crandc cr4_lt,cr4_lt,cr4_eq /* fiddle the sign of the result */ + b do_sign /* and finish up */ +/* */ +/* Here when final exponent before rounding is too big */ +/* */ +out_of_range: + addis r9,0,0x7FF0 /* return a INF */ + xor r10,r10,r10 /* */ + b do_sign /* with sign of "big" */ +/* */ +/* A is INF or NaN (B might be also) */ +/* */ +A_is_spec: + mr r10,r4 /* get low word of A */ + rlwinm r9,r8,0,0x000FFFFF /* get fraction (high) */ + or. r0,r9,r10 /* check for NaN */ + oris r9,r8,0x0008 /* assume A is a NaN. Quiesce it */ + bne cr0,do_store /* A is NaN. Just finish */ +/* */ + ori r9,r8,0 /* A is INF. Set up <r9,r10> with A */ + bne cr1,do_store /* all done if B is finite */ + mr r10,r6 /* get low part of B */ + /* doesn't hurt - either it's a NaN (low) */ + /* or it's zero like A (low) */ + rlwinm r11,r7,0,0x000FFFFF /* get B fraction (high) */ + cmpwi cr7,r7,0 /* CR7_lt == frb sign bit */ + or. r0,r11,r10 /* any non-zero bits? */ +/* */ + xor r0,r7,r8 /* both A and B are INF. INF - INF => NaN */ + /* Assume that r10 == 0, and both r7 and r8 are INFs */ + /* so either result is same as A(INF) or is NaN (if */ + /* signs are different */ + rlwimi r9,r0,20,0x00080000 /* generate either INF or NaN from A (r9 set up earlier) */ + crand cr4_lt,cr6_lt,cr7_lt /* sign is sign of A if same, else + */ + beq cr0,do_sign /* if B was INF, we're done */ +/* */ + mr r9,r5 /* get high part of B */ + oris r9,r9,0x0008 /* rats! quiesce the NaN */ + b do_store /* finish up */ + +/* */ +/* here when B is either INF or NaN (but A is neither) */ +/* */ +B_is_spec: + mr r10,r6 /* get low part of B */ + rlwinm r11,r7,0,0x000FFFFF /* get B fraction (high) */ + or. r0,r11,r10 /* any non-zero bits? */ + beq B_is_inf + mr r9,r5 /* get high part of B */ + oris r9,r9,0x0008 /* quiesce NaN */ + b do_store + +B_is_inf: + ori r9,r7,0 /* B is INF */ + b do_store /* done */ + +function_epilog(__adddf3) + +#endif + +#ifdef L__addsf3 + +/* fpopt/ppc_fadds.S, pl_FPE_common, pl_linux 11/24/03 16:17:26 */ +/*----------------------------------------------------------------------------- */ +/* Copyright (c) 2003, IBM Corporation */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* * Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* * Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* * Neither the name of IBM nor the names of its contributors */ +/* may be used to endorse or promote products derived from this */ +/* software without specific prior written permission. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ +/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ +/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, */ +/* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, */ +/* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR */ +/* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY */ +/* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE */ +/* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/*----------------------------------------------------------------------------- */ +/* */ +/* Function: add two single floating point numbers. frt = fpa + fpb */ +/* Input: r3(fpa), r4(fpb) */ +/* Output: r3(frt) */ +/* Notes: 1. No stack frame is created for this function, so the following */ +/* registers must be preserved, as required by ABI specification: */ +/* LR, CR0, R1, R2, R13-R31 */ +/* 2. operation performed according to IEEE754-1985 standard with */ +/* rounding mode = nearest even. */ +/* 3. This file contains code common to both addition and subtraction. */ +/* */ +/*----------------------------------------------------------------------------- */ + +#include "ppc4xx.inc" +#include "fpeLib.inc" + +/* */ +#define SEXPMAX 255 /* Max value for single precision exponent */ +#define SEXPBIAS 127 /* Bias value for single precision exponent */ + +function_prolog(__addsf3) + +.globl _ppc_fadds_common + +/* save cr in r0 */ + mfcr r0 +/* load fpa into r8,r9,r10 and cr6. load fpb into r11, r12, r4 and cr7 */ + mr r9,r3 /* load fpa.S, fpa.exp, fpa.hi */ + mr r12,r4 /* load fpb.S, fpb.exp, fpb.hi */ + rlwinm r8,r9,9,0xff /* isolate exponent of fpa */ + +_ppc_fadds_common: /* common routine for fadds and fsubs */ + rlwinm r11,r12,9,0xff /* isolate exponent of fpb */ + cmpwi cr6,r9,0 /* set fpa.sign */ + cmpwi cr7,r12,0 /* set fpb.sign */ + rlwinm. r9,r9,0,0x007fffff /* isolate fpa.hi */ + cror cr6_zero,cr0_2,cr0_2 /* fpa.zero = (fpa.hi == 0) */ + rlwinm. r12,r12,0,0x007fffff /* isolate fpb.hi */ + cror cr7_zero,cr0_2,cr0_2 /* fpb.zero = (fpb.hi == 0) */ + cmpwi cr2,r8,SEXPMAX /* if (fpa.exp == SEXPMAX) */ + cmpwi cr0,r8,0 /* if (fpa.exp == 0) */ + crand cr6_zero,cr6_zero,cr0_2 /* fpa.zero=(fpa.exp==0 && fpa==0) */ + crandc cr0_2,cr0_2,cr6_zero /* if (fpa.exp==0 && fpa!=0) */ + cmpwi cr3,r11,SEXPMAX /* if (fpb.exp == SEXPMAX) */ + beq denormal_a /* if fpa is not denormalized */ + oris r9,r9,0x0080 /* add in implicit 1. */ + b adone /* } else { */ +denormal_a: + addi r8,r8,1 /* fpa.exp++; */ +adone: /* } */ + crand cr7_inf,cr7_zero,cr1_2 /* fpb.inf=(fpb.exp==SEXPMAX && fpb==0) */ + crandc cr7_NaN,cr1_2,cr7_zero /* fpb.NaN=(fpb.exp==SEXPMAX && fpb!=0) */ + cmpwi cr0,r11,0 /* if (fpb.exp == 0) */ + crand cr7_zero,cr7_zero,cr0_2 /* fpb.zero=(fpb.exp==0 && fpb==0) */ + crandc cr0_2,cr0_2,cr7_zero /* if (fpb.exp==0 && fpb!=0) */ + beq denormal_b /* { Add implied 1 to significand */ + oris r12,r12,0x0080 /* fpb.hi |= 0x00800000; */ + b bdone /* } else { */ +denormal_b: + addi r11,r11,1 /* fpb.exp++; */ +bdone: /* } */ + +/* check for Not-A-Number or INFinity */ + bt cr2_2,a_NaNorINF /* if (fpa.NaN||fpa.INF) goto a_NaNorINF; */ + bt cr3_2,b_NaNorINF /* if (fpb.NaN||fpb.INF) goto b_NaNorINF; */ + +/* check for a or b zero - if so, done */ + bt cr6_zero,a_zero /* if (fpa.zero) return fpb */ + bt cr7_zero,b_zero /* if (fpb.zero) return fpa */ + +/* if (fpa < 0) fpa = -fpa; */ + bf+ cr6_sign,a_positive /* if (fpa < 0) { */ + subfic r9,r9,0 /* fpa = -fpa; */ +a_positive: /* } */ +/* if (fpb < 0) fpb = -fpb; */ + bf+ cr7_sign,b_positive /* if (fpb < 0) { */ + subfic r12,12,0 /* fpb = -fpb; */ +b_positive: /* } */ + cmpw cr0,r11,r8 /* if (fpa.exp < fpb.exp ) */ + li r10,0 /* fpa.lo = 0 */ + li r4,0 /* fpb.lo = 0 */ + bgt shifta /* { */ +/* fpb.significand >>= (fpa.exp - fpb.exp); */ + subf. r11,r11,r8 /* shift = fpa.exp - fpb.exp; */ + cmpwi cr1,r11,27 /* if beyond precision */ + beq do_add /* if (shift == 0) goto do_add; */ + ble+ cr1,shiftless27 /* if (shift > 27) { */ + /* add in a (signed) rounding bit */ + bt cr7_sign,add_minus1 /* if (b > 0) { add in 1 */ + li r12,0 /* fpb.hi = 0; */ + li r4,1 /* fpb.lo = 1; */ + b do_add /* } */ +add_minus1: /* else { b < 0 : add in -1 */ + li r12,-1 /* fpb.hi = -1; */ + li r4,-1 /* fpb.lo = -1; */ + b do_add /* } */ + /* } */ +shiftless27: /* else */ + /* { */ + subfic r6,r11,32 /* r6 = 32-shift; */ + slw r4,r12,r6 /* fpb.lo = shift out from fpb.hi */ + sraw r12,r12,r11 /* (signed)fpb.hi >>= shift; */ + b do_add /* } */ + +shifta: /* else { */ +/* fpa.significand >>= (fpb.exp - fpa.exp); */ + mr r6,r11 /* save exponent */ + subf. r11,r8,r11 /* shift = fpb.exp - fpa.exp; */ + mr r8,r6 /* exponent */ + cmpwi cr1,r11,27 /* if beyond precision */ + ble+ cr1,Ashiftless27 /* if (shift > 27) { */ + /* add in a (signed) rounding bit */ + bt cr6_sign,Aadd_minus1 /* if (a > 0) { add in 1 */ + li r9,0 /* fpa.hi = 0; */ + li r10,1 /* fpa.lo = 1; */ + b do_add /* } */ +Aadd_minus1: /* else { a < 0 : add in -1 */ + li r9,-1 /* fpa.hi = -1; */ + li r10,-1 /* fpa.lo = -1; */ + b do_add /* } */ + /* } */ +Ashiftless27: /* else */ + /* { */ + subfic r6,r11,32 /* r6 = 32-shift; */ + slw r10,r9,r6 /* fpb.lo = shift out from fpb.hi */ + sraw r9,r9,r11 /* (signed)fpa.hi >>= shift; */ +do_add: /* } */ +/* fpa.significand += fpb.significand; */ + addc r10,r10,r4 /* fpa.lo += fpb.lo; */ + adde. r9,r9,r12 /* fpa.hi += fpb.hi + CA; */ + cror cr6_sign,cr0_0,cr0_0 /* fpa.sign = (fpa < 0); */ +/* if (fpa < 0) fpa = -fpa; */ + bge pos_result /* if (fpa < 0) { */ + subfic r10,r10,0 /* fpa = -fpa; */ + subfze r9,r9 +pos_result: /* } */ +/* normalize fpa */ + cntlzw r5,r9 /* s = cntlz(fpa.hi); */ + cmpwi cr0,r5,8 /* if (s < 8 ) */ + /* Note that first 8 bits of word */ + /* were 0 (where exponent and sign */ + /* were), so s can really only have */ + /* a min value of 7 . */ + bge noshrght /* { */ + subfic r5,r5,8 /* r5 = 8-s; */ + subfic r11,r5,32 /* r11 = 24+s; */ + srw r10,r10,r5 /* fpa.lo >>= (11-s); */ + slw r6,r9,r11 /* temp = fpa.hi << (21+s); */ + srw r9,r9,r5 /* fpa.hi >>= (11-s); */ + add r8,r5,r8 /* fpa.exp += (11-s); */ + or r10,r10,r6 /* fpa.lo |= temp; */ + b noshift /* } */ +noshrght: /* else if (s > 8) */ + beq noshift /* { */ + cmpwi cr0,r5,32 /* if (s < 32) */ + bge gt32 /* { */ + addi r5,r5,-8 /* r5 = s-8 ; */ + subf r8,r5,r8 /* fpa.exp -= (s-8); */ + slw r9,r9,r5 /* fpa.hi <<= (s-8); */ + subfic r11,r5,32 /* r11 = 46-s; */ + srw r6,r10,r11 /* temp == fpa.low >> (46-s); */ + slw r10,r10,r5 /* fpa.lo <<= (s-8); */ + or r9,r9,r6 /* fpa.hi |= temp; */ + b noshift /* } */ +gt32: /* else */ + /* { */ + cntlzw r5,r10 /* s = cntlz(fpa.lo); */ + cmpwi cr0,r5,32 /* if (s == 32) */ + blt+ not_zero /* { Return value of +0.0 */ + li r3,0 /* r3 = 0; */ + mtcr r0 /* restore cr */ + blr /* return; */ +not_zero: /* } */ + addi r6,r5,32-8 /* temp = s+(32-8 ); */ + subf r8,r6,r8 /* fpa.exp -= (s+(32-8)); */ + addic. r5,r5,-8 /* r5 = s-8 ; */ + ble+ sh32le /* if (shift > 8) { */ + slw r9,r10,r5 /* fpa.hi = fpa.lo << (s-8); */ + li r10,0 /* fpa.lo = 0; */ + b noshift /* } else { */ +sh32le: subfic r5,r5,0 /* r5 = 8-s; */ + srw r9,r10,r5 /* fpa.hi = fpa.lo >> (8-s); */ + subfic r11,r5,32 /* r11 = 32-(8-s); */ + slw r10,r10,r11 /* fpa.lo << 32-(8-s); */ + /* } */ + /* } */ +noshift: /* } */ +/* Check for single overflow */ + cmpwi cr0,r8,SEXPMAX /* if (fpa.exp > SEXPMAX) */ + bge res_INF /* Set results to INF */ +/* Check for number in denormalised range */ + cmpwi cr0,r8,1 /* else if (fpa.exp >= 1 */ + blt denormexp /* { */ +/* round fpa to nearest */ + lis r5,0x8000 /* guard bit */ + cmplw cr7,r10,r5 /* if ((round >= 0x80000000) || */ + blt cr7,noround /* */ + andi. r6,r9,0x00000001 /* (((fpa.lobit == 0) && */ + crand cr0_2,cr7_2,cr0_2 /* (round == 0x80000000))) */ + bt cr0_2,noround /* { */ + addi r9,r9,1 /* fpa.hi++; */ + lis r6,0x0100 /* */ + cmpw cr0,r9,r6 /* if (fpa.hi == 0x01000000) */ + bne+ noround /* { */ + addi r8,r8,1 /* fpa.exp++; */ + /* } */ + /* } */ + b noround /* } */ +denormexp: /* else */ + /* { */ + subfic r12,r8,1 /* shift=1-fpa.exp; */ + li r8,0 /* fpa.exp = 0 */ + cmpwi cr0,r12,25 /* if ( shift < 25) */ + bge rnd2zero /* { */ + subfic r11,r12,32 /* r11 = 32-shift; */ + mtctr r0 /* (save r0) */ + slw. r0,r10,r11 /* round = fpa.lo << (32-shift); */ + srw r5,r10,r12 /* r5 = fpa.lo >> shift; */ + slw r6,r9,r11 /* temp = fpa.hi << (32-shift); */ + srw r4,r9,r12 /* r4 = fpa.hi >> shift; */ + or r5,r5,r6 /* r5 |= temp; */ + beq nostk /* if bits shifted out of fpa.lo */ + oris r5,r5,0x2000 /* set sticky bit; */ +nostk: + lis r6,0x8000 + cmplw cr1,r0,r6 /* if (round >= 0x80000000) guard set */ + blt cr1,nornd /* { */ + andi. r0,r5,0x00000001 /* if (((fpa.lobit==0) && */ + crand cr0_2,cr0_2,cr1_2 /* (round == 0x10000000)) */ + beq+ nornd /* goto nornd; */ + addi r4,r4,1 /* r4++; */ + lis r6,0x0080 + cmplw cr0,r4,r6 /* if (fpa.hi == 0x00800000) carry out */ + bne+ nornd /* { */ + addi r8,r8,1 /* fpa.exp++; */ + /* } */ +nornd: /* } */ + mfctr r0 /* (restore r0) */ + mr r9,r4 + b noround /* } */ +rnd2zero: /* else */ + /* { */ + li r8,0 /* fpa.exp = 0; */ + li r9,0 /* fpa.hi = 0; */ + li r10,0 /* fpa.lo = 0; */ + /* } */ +noround: /* } */ + cmpwi cr0,r8,SEXPMAX /* if (fpa.exp > DEXPBIAS+SEXPMAX) */ + bgt res_INF /* Set results to INF */ +/* fpt = fpa; */ + rlwimi r9,r8,23,0x7f800000 /* fpa.hi |= fpa.exp << 23; */ + bf cr6_sign,nosign /* if (fpa.sign) { */ + oris r9,r9,0x8000 /* fpa.hi |= 0x80000000; */ +nosign: /* } */ + mr r3,r9 /* *FRT = fpa.hi; */ + mtcr r0 /* restore cr */ + blr /* return; */ + +a_zero: + mr r3,r4 /* copy fpbb into return reg */ + bf cr7_zero,a_zeroret /* if (fpb.zero) fixup sign */ + rlwinm r3,r3,0,0x7fffffff /* assume no sign */ + crand cr0_0,cr6_sign,cr7_sign /* if (fpa.sign AND fpb.sign */ + bf cr0,a_zeroret + oris r3,r3,0x8000 /* set sign bit */ + +a_zeroret: + mtcr r0 /* restore cr */ + blr /* return; */ + +b_zero: /* return fpa (already in r3) */ + mtcr r0 /* restore cr */ + blr /* return; */ + +a_NaNorINF: +/* fpa is either INF or NaN */ + mr r8,r3 /* r8 = FRA; */ + rlwinm. r5,r8,0,0x007fffff /* if (fpa.hi == 0 */ + beq a_INF /* goto a_INF; */ +a_NaN: /* return QNaN; */ + oris r3,r8,0x0040 /* FRA->hi |= 0x400000; */ + mtcr r0 /* restore cr */ + blr /* return; */ + +b_NaNorINF: +/* fpb is either INF or NaN */ + mr r8,r4 /* r8 = FRB; */ +/* The following is necessary because fsubs will have already flipped the bit */ + rlwinm r8,r8,0,0x7fffffff /* clear sign bit */ + bf cr7_sign,no_set_sign /* if (fpb.sign) */ + oris r8,r8,0x8000 /* set sign bit */ +no_set_sign: + rlwinm. r4,r8,0,0x007fffff /* if (fpb.hi == 0 */ + beq b_INF /* goto b_INF; */ +b_NaN: /* return QNaN; */ + oris r3,r8,0x0040 /* FRB->hi |= 0x400000; */ + mtcr r0 /* restore cr */ + blr /* return; */ + +a_INF: /* fpa is infinity */ + mr r8,r4 /* r8 = FRB */ + rlwinm. r11,r8,0,0x007fffff /* is FRB.fraction non-zero? */ + crandc cr7_NaN,cr3_2,cr0_2 /* if max exponent & non-zero fraction */ + bt cr7_NaN,b_NaN /* B is NAN */ + crxor cr0_0,cr6_sign,cr7_sign /* if (fpa.sign ^ fpb.sign && */ + crand cr0_0,cr0_0,cr3_2 /* fpb.inf) */ + bf cr0_0,rtn_INF /* { */ + li r3,0xffffffff /* return -QNAN; */ + mtcr r0 /* } */ + blr + +rtn_INF: + /* r3 = infinity */ + mtcr r0 /* restore cr */ + blr /* return; */ + +b_INF: /* fpb is infinity */ + mr r3,r8 /* *FRT = r8; */ + mtcr r0 /* restore cr */ + blr /* return; */ + +res_INF: /* result is infinity */ + lis r3,0x7f80 + bf cr6_sign,inf_pos /* if (fpa.sign) { */ + oris r3,r3,0x8000 /* fpa.hi |= 0x80000000; */ +inf_pos: /* } */ + mtcr r0 /* restore cr */ + blr /* return; */ + +function_epilog(__addsf3) + +#endif + +#if defined(L__gedf2) || defined(L__gtdf2) || defined(L__eqdf2) || defined(L__nedf2) || defined(L__ledf2) || defined(L__ltdf2) || defined(L__cmpdf2) + +/* fpopt/ppc_fcmpd.S, pl_FPE_common, pl_linux 11/24/03 16:17:28 */ +/*----------------------------------------------------------------------------- */ +/* Copyright (c) 2003, IBM Corporation */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* * Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* * Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* * Neither the name of IBM nor the names of its contributors */ +/* may be used to endorse or promote products derived from this */ +/* software without specific prior written permission. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ +/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ +/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, */ +/* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, */ +/* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR */ +/* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY */ +/* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE */ +/* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* */ +/*----------------------------------------------------------------------------- */ +/* */ +/* Function: compare two double floating point numbers. */ +/* Input: r3,r4(fra) */ +/* r5,r6(frb) */ +/* Output: r3, value as defined below */ +/* equal -> 0 */ +/* a > b -> 1 */ +/* a < b -> -1 */ +/* Notes: 1. No stack frame is created for this function, so the following */ +/* registers must be preserved, as required by ABI specification: */ +/* LR, CR0, R1, R2, R13-R31 */ +/* 2. operation performed according to IEEE754-1985 standard with */ +/* rounding mode = nearest even. */ +/* */ +/*----------------------------------------------------------------------------- */ + +#include "ppc4xx.inc" +#include "fpeLib.inc" + +#ifdef L__gedf2 +function_prolog(__gedf2) +#endif +#ifdef L__gtdf2 +function_prolog(__gtdf2) + li r8,-1 /* return value in NAN case */ + b start +#endif +#ifdef L__eqdf2 +function_prolog(__eqdf2) +#endif +#ifdef L__nedf2 +function_prolog(__nedf2) +#endif +#ifdef L__ledf2 +function_prolog(__ledf2) +#endif +#ifdef L__ltdf2 +function_prolog(__ltdf2) +#endif +#ifdef L__cmpdf2 +function_prolog(__cmpdf2) +#endif + li r8,1 /* return value in NAN case */ +start: + mfcr r11 /* save cr */ + rlwinm r9,r3,0,0x7FFFFFFF /* pull high word A, ignoring sign */ + rlwinm r10,r5,0,0x7FFFFFFF /* pull high word B, ignoring sign */ + cmpw cr1,r9,r10 /* compare high words */ + beq cr1,slowpath /* exception case */ +/* high words different here, cr1 has the right sense of comparison */ +/* for same sign + and opposite sense for same sign - . If signs are */ +/* different, magnitude of exponents doesn't matter (except to check the */ +/* largest one for a NAN */ + cmpwi cr2,r3,0 /* cr2[LT] set if sign is 1 */ + cmpwi cr3,r5,0 /* cr3[LT] set if sign is 1 */ + lis r7,0x7FF0 /* build comparison mask */ + blt cr1,FRA_LESS /* FRA is less */ +/* FRA greater here - check if for DEXPMAX */ + cmpw cr0,r9,r7 /* check FRA against DEXPMAX */ + bge slowpath /* NaN or Infinity (sort out later) */ + blt cr2,set_CR_LT /* FRA GT and neg (FRB sign immaterial) */ +/* FRA greater and positive here (sign of FRB immaterial) */ +set_CR_GT: + li r3,1 /* FRA > FRB */ + mtcr r11 /* restore cr */ + blr +FRA_LESS: +/* FRB greater here - check if for DEXPMAX */ + cmpw cr0,r10,r7 /* check against DEXPMAX */ + bge slowpath /* NaN or Infinity (sort out later) */ + blt cr3,set_CR_GT /* FRB greater and negative */ +/* FRB greater and positive here (sign of FRA immaterial) */ +set_CR_LT: + li r3,-1 /* FRA < FRB */ + mtcr r11 /* restore cr */ + blr +/* need to sort out more detail if here */ +slowpath: +/* Do NAN tests */ + lis r12,0x7FF0 /* Build comparison mask */ + cmpw cr0,r9,r12 + blt FRB_NAN + rlwinm r7,r3,0,0x000FFFFF /* Check for non zero fraction */ + or. r7,r7,r4 + bne set_CR_SO /* exponent is DEXPMAX and lofrac != 0 */ +FRB_NAN: + cmpw cr0,r10,r12 + blt sign_test + rlwinm r7,r5,0,0x000FFFFF /* Check for non zero fraction */ + or. r7,r7,r6 /* set condition for zero test */ + bne set_CR_SO /* exponent is DEXPMAX and lofrac != 0 */ +sign_test: + xor. r12,r3,r5 /* msb will be 1 if signs are different */ + bnl same_sign +/* if signs different special test for -0 = + 0 */ + or. r12,r4,r6 /* OR loFractions */ + bne lsk0 + or r12,r3,r5 /* OR high words of FRA and FRB */ + rlwinm. r12,r12,0,0x7FFFFFFF /* ignore sign bit */ + beq set_CR_EQ +lsk0: + or. r3,r3,r3 /* set condition */ + blt set_CR_LT + b set_CR_GT +same_sign: +/* clear signs and compare */ + cmpw cr1,r9,r10 + or. r3,r3,r3 /* Sets cr0[LT] to indicate sign */ + beq cr1,same_hifrac + blt cr1,cond_CR_LT /* conditionally less than (sign?) */ + b cond_CR_GT +same_hifrac: + cmplw cr1,r4,r6 /* compare low fractions */ + beq cr1,set_CR_EQ + blt cr1,cond_CR_LT /* conditionally less than (sign?) */ +cond_CR_GT: + bnl set_CR_GT + b set_CR_LT +cond_CR_LT: + bnl set_CR_LT /* ruling stands */ + b set_CR_GT +set_CR_EQ: + li r3,0 + b set_cc +set_CR_SO: /* unordered case */ + mr r3,r8 +set_cc: + mtcr r11 + blr + +#ifdef L__eqdf2 +function_epilog(__eqdf2) +#endif +#ifdef L__nedf2 +function_epilog(__nedf2) +#endif +#ifdef L__ledf2 +function_epilog(__ledf2) +#endif +#ifdef L__ltdf2 +function_epilog(__ltdf2) +#endif +#ifdef L__gedf2 +function_epilog(__gedf2) +#endif +#ifdef L__gtdf2 +function_epilog(__gtdf2) +#endif +#ifdef L__cmpdf2 +function_epilog(__cmpdf2) +#endif + +#endif + +#if defined(L__gesf2) || defined(L__gtsf2) || defined(L__eqsf2) || defined(L__nesf2) || defined(L__lesf2) || defined(L__ltsf2) || defined(L__cmpsf2) + +/* fpopt/ppc_fcmps.S, pl_FPE_common, pl_linux 11/24/03 16:17:29 */ +/*----------------------------------------------------------------------------- */ +/* Copyright (c) 2003, IBM Corporation */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* * Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* * Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* * Neither the name of IBM nor the names of its contributors */ +/* may be used to endorse or promote products derived from this */ +/* software without specific prior written permission. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ +/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ +/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, */ +/* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, */ +/* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR */ +/* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY */ +/* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE */ +/* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/*----------------------------------------------------------------------------- */ +/* */ +/* Function: compare two single floating point numbers. */ +/* Input: r3(fra), r4(frb) */ +/* Output: r3, value as defined below */ +/* equal -> 0 */ +/* a > b -> 1 */ +/* a < b -> -1 */ +/* Notes: 1. No stack frame is created for this function, so the following */ +/* registers must be preserved, as required by ABI specification: */ +/* LR, CR0, R1, R2, R13-R31 */ +/* 2. operation performed according to IEEE754-1985 standard with */ +/* rounding mode = nearest even. */ +/* */ +/*----------------------------------------------------------------------------- */ + +#include "ppc4xx.inc" +#include "fpeLib.inc" + +#ifdef L__gesf2 +function_prolog(__gesf2) +#endif +#ifdef L__gtsf2 +function_prolog(__gtsf2) + li r8,-1 /* return value in NAN case */ + b start +#endif +#ifdef L__eqsf2 +function_prolog(__eqsf2) +#endif +#ifdef L__nesf2 +function_prolog(__nesf2) +#endif +#ifdef L__lesf2 +function_prolog(__lesf2) +#endif +#ifdef L__ltsf2 +function_prolog(__ltsf2) +#endif +#ifdef L__cmpsf2 +function_prolog(__cmpsf2) +#endif + li r8,1 /* return value in NAN case */ +start: + mfcr r11 /* save cr */ + rlwinm r9,r3,0,0x7FFFFFFF /* pull FRA, ignoring sign */ + rlwinm r10,r4,0,0x7FFFFFFF /* pull FRB, ignoring sign */ + cmpw cr1,r9,r10 /* compare FRA,FRB */ + beq cr1,slowpath /* exception case */ +/* high words different here, cr1 has the right sense of comparison */ +/* for same sign + and opposite sense for same sign - . If signs are */ +/* different, magnitude of exponents doesn't matter (except to check the */ +/* largest one for a NAN */ + cmpwi cr2,r3,0 /* cr2[LT] set if sign is 1 */ + cmpwi cr3,r4,0 /* cr3[LT] set if sign is 1 */ + lis r7,0x7F80 /* build comparison mask */ + blt cr1,FRA_LESS /* FRA is less */ +/* FRA greater here - check if for DEXPMAX */ + cmpw cr0,r9,r7 /* check FRA against DEXPMAX */ + bge slowpath /* NaN or Infinity (sort out later) */ + blt cr2,set_CR_LT /* FRA GT and neg (FRB sign immaterial) */ +/* FRA greater and positive here (sign of FRB immaterial) */ +set_CR_GT: + li r3,1 /* FRA > FRB */ + mtcr r11 /* restore cr */ + blr +FRA_LESS: +/* FRB greater here - check if for DEXPMAX */ + cmpw cr0,r10,r7 /* check against DEXPMAX */ + bge slowpath /* NaN or Infinity (sort out later) */ + blt cr3,set_CR_GT /* FRB greater and negative */ +/* FRB greater and positive here (sign of FRA immaterial) */ +set_CR_LT: + li r3,-1 /* FRA < FRB */ + mtcr r11 /* restore cr */ + blr +/* need to sort out more detail if here */ +slowpath: +/* Do NAN tests */ + lis r12,0x7F80 /* Build comparison mask */ + cmpw cr0,r9,r12 + blt FRB_NAN + rlwinm. r7,r3,0,0x007FFFFF /* Check for non zero fraction */ + bne set_CR_SO /* exponent is DEXPMAX and lofrac != 0 */ +FRB_NAN: + cmpw cr0,r10,r12 + blt sign_test + rlwinm. r7,r4,0,0x007FFFFF /* Check for non zero fraction */ + bne set_CR_SO /* exponent is DEXPMAX and lofrac != 0 */ +sign_test: + xor. r12,r3,r4 /* msb will be 1 if signs are different */ + bnl same_sign +/* if signs different special test for -0 = + 0 */ + or. r12,r9,r10 /* test for FRA=FRB=0 (ignoring sign) */ + beq set_CR_EQ + + or. r3,r3,r3 /* set condition based on sign */ + blt set_CR_LT + b set_CR_GT +same_sign: +/* clear signs and compare */ + cmpw cr1,r9,r10 + or. r3,r3,r3 /* Sets cr0[LT] to indicate sign */ + beq cr1,set_CR_EQ + blt cr1,cond_CR_LT /* conditionally less than (sign?) */ +cond_CR_GT: + bnl set_CR_GT + b set_CR_LT +cond_CR_LT: + bnl set_CR_LT /* ruling stands */ + b set_CR_GT +set_CR_EQ: + li r3,0 + b set_cc +set_CR_SO: + mr r3,r8 /* unordered case */ +set_cc: + mtcr r11 + blr + +#ifdef L__eqsf2 +function_epilog(__eqsf2) +#endif +#ifdef L__nesf2 +function_epilog(__nesf2) +#endif +#ifdef L__lesf2 +function_epilog(__lesf2) +#endif +#ifdef L__ltsf2 +function_epilog(__ltsf2) +#endif +#ifdef L__gesf2 +function_epilog(__gesf2) +#endif +#ifdef L__gtsf2 +function_epilog(__gtsf2) +#endif +#ifdef L__cmpsf2 +function_epilog(__cmpsf2) +#endif + +#endif + +#ifdef L__divdf3 + +/* fpopt/ppc_fdiv.S, pl_FPE_common, pl_linux 11/24/03 16:17:31 */ +/*----------------------------------------------------------------------------- */ +/* Copyright (c) 2003, IBM Corporation */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* * Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* * Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* * Neither the name of IBM nor the names of its contributors */ +/* may be used to endorse or promote products derived from this */ +/* software without specific prior written permission. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ +/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ +/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, */ +/* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, */ +/* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR */ +/* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY */ +/* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE */ +/* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/*----------------------------------------------------------------------------- */ +/* */ +/* Function: divide two double floating point values. frt = fpa / fpb */ +/* Input: r3,r4(fpa) */ +/* r5,r6(fpb) */ +/* Output: r3,r4(frt) */ +/* Notes: 1. A stack frame is created, following ABI specification, and */ +/* non-volatile registers are saved and restored on the stack. */ +/* 2. operation performed according to IEEE754-1985 standard with */ +/* rounding mode = nearest even. */ +/* */ +/*----------------------------------------------------------------------------- */ + +#include "ppc4xx.inc" +#include "fpeLib.inc" + +#define first_loop 4 + +function_prolog(__divdf3) + + mflr r0 /* save link register in caller's stack */ + stw r0,4(r1) + stwu r1,-STACKSIZE(r1) /* set up stack frame to hold saved regs */ + + SAVEREG(30) + SAVEREG(31) + mfcr r0 /* save cr */ +/* load fpa into r8,r9,r10 and cr6. load fpb into r11, r12, r13 and cr7 */ + mr r9,r3 /* load fpa.exp, fpa.S, fpa.hi */ + mr r12,r5 /* load fpb.exp, fpb.S, fpb.hi */ + rlwinm r8,r9,32-20,0x7ff /* isolate exponent of fpa into r8 */ + rlwinm r11,r12,32-20,0x7ff /* isolate exponent of fpb into r11 */ + mr r10,r4 /* load fpa.lo */ + cmpwi cr6,r9,0 /* set fpa.sign */ + /* load fpb.lo (already in r6)*/ + cmpwi cr7,r12,0 /* set fpb.sign */ +/* test for normal: */ + cmpwi cr0,r8,0x7ff /* if (fpa.exp == DEXPMAX) */ + cmpwi cr1,r8,0 /* if (fpa.exp == 0) */ + cmpwi cr2,r11,0x7ff /* if (fpb.exp == DEXPMAX) */ + cmpwi cr3,r11,0 /* if (fpb.exp == 0) */ + beq full + beq cr1,full + beq cr2,full + beq cr3,full + +/* normal case only: */ + oris r9,r9,0x0010 /* fpa.hi |= 0x00100000; */ + oris r12,r12,0x0010 /* fpb.hi |= 0x00100000; */ +/* Calculate exponent */ + subf r8,r11,r8 /* result.exp = fpa.exp - fpb.exp; */ + /* ------- note: r11 is now free */ + addic. r8,r8,+1023 /* result.exp += DEXPBIAS; check for negative */ + /* result.exp += 1 (for dividing left-just */ + /* numbers if result is left-just) */ + cmpwi cr2,r8,0x7ff /* if (result.exp >= DEXPMAX) */ + ble full + bge cr2, full /* cr2 */ +/* Zero out the non-number part of the operands: */ +/* for the sign bit plus one working bit: */ + rlwinm r9,r9,0,11,31 /* r9 = fpa.hi */ + /* r10 = fpa.lo */ + + rlwinm r12,r12,0,11,31 /* r12 = fpb.hi */ + /* r6 = fpb.lo */ + +/* Divide the numbers (54 bits of long division); */ +/* r7/r11 hold the intermediate differences. */ + li r7,54 /* calculate 53 signif + 1 sticky */ + mtctr r7 +longdivloop: +/* r7/r11 = r9/r10 - r12/r6: */ + subfc r11,r6,r10 + subfe. r7,r12,r9 + adde r31,r31,r31 /* rot result left and put CA into LSb of result */ + adde r30,r30,r30 /* rotate r30/r31 left 1 bit */ + blt+ zerobit + /* if intermed diff positive then: */ + addc r10,r11,r11 /* r9/r10 get r7/r11 << 1 */ + adde r9,r7,r7 /* */ + bdnz longdivloop + b done54 + +zerobit: /* else: */ + addc r10,r10,r10 /* r9/r10 get r9/r10 << 1 */ + adde r9,r9,r9 /* */ + bdnz longdivloop +done54: + +/* Since we divided one left-justified number by another left-justified */ +/* number, there are two possibilities for the result: */ +/* either (the msb is 1) or */ +/* (the msb is 0 and the next bit is 1). If the msb is 0, then we need to */ +/* calculate one more bit (to make 54 significant bits). */ +/* NOTE: the msb is now in bit 10 (=64-54) of result.hi=r30, because we have */ +/* calculated 54 bits. */ + rlwinm. r7,r30,0,10,10 /* test hi-bit = bit 10 of result.hi */ + bne+ highbit_is_1 + addic. r8,r8,-1 /* shifted case: exponent is 1 less than */ + /* non-shifted. */ + beq full /* exponent=0 is special case */ + addi r7,0,1 + mtctr r7 + b longdivloop +highbit_is_1: + +/* We now have the first 54 bits of the result. Bits beyond 53 are only */ +/* needed to determine rounding. If bit 54 (the last one just calculated) */ +/* is 0, then we don't round. If it is 1, then we'll calculate up to 12 more */ +/* bits, looking for a 1. The first time we see a 1, we know to round up. */ +/* But even if all 12 are 0, we still don't know for sure to not round, */ +/* so we punt to the full routine. */ + rlwinm r7,r31,0,30,31 + cmpwi r7,1 + bne roundup +/* At this point the LSb is 0 and the sticky bit is 1; we need more bits */ +/* to determine rounding -- if any subsequent bit is 1, then we round up. */ +/* We'll try 12 more bits before giving up: */ + addi r7,0,12 + mtctr r7 +seek1loop: +/* r7/r11 = r9/r10 - r12/r6: */ + subfc r11,r6,r10 + subfe. r7,r12,r9 + bge roundup + /* (result(this bit) = 0) */ + addc r10,r10,r10 /* r9/r10 get r9/r10 << 1 */ + adde r9,r9,r9 /* */ + bdnz seek1loop + b full +roundup: /* adding 1 to the sticky bit performs standard */ + addic r31,r31,1 /* rounding; i.e. if sticky=0 then other bits */ + addze r30,r30 /* are not affected */ + +/* We're done! Just assemble the result back into IEEE format: */ +/* r30 = IEEE MSW, r31 = IEEE LSW */ +/* We need to get the msb from bit 10 to bit 11 of result.hi; so shift */ +/* right 1 bit: */ + rlwinm r31,r31,31,1,31 /* result >> 1 */ + rlwimi r31,r30,31,0,0 /* */ + rlwinm r30,r30,31,12,31 /* */ +/* Put the exponent into bits 1-11 of the MSW: */ + rlwimi r30,r8,20,1,11 +/* Calculate sign bit: */ + crxor cr6_sign,cr6_sign,cr7_sign + bf cr6_sign,signbit0 + oris r30,r30,0x8000 /* set sign bit */ +signbit0: + mr r3,r30 /* *FRT = fpa.hi; */ + mr r4,r31 /* *FRT = fpa.lo; */ + RESTREG(30) + RESTREG(31) + mtcr r0 /* restore cr */ + +return_common: + lwz r1,0(r1) /* previous stack frame */ + lwz r0,4(r1) /* saved link register */ + mtlr r0 /* restore link register */ + blr /* return */ + +/*---------------- end of normal-only case --------------- */ +full: + RESTREG(30) + RESTREG(31) + mtcr r0 /* restore CR */ + + + SAVEREG(16) /* save r16 */ + mfcr r16 /* save cr */ +/* load fpa into r8,r9,r10 and cr6. load fpb into r11, r12, r6 and cr7 */ + mr r9,r3 /* load fpa.exp, fpa.S, fpa.hi */ + mr r12,r5 /* load fpb.exp, fpb.S, fpb.hi */ + rlwinm r8,r9,32-20,0x7ff /* isolate exponent of fpa */ + rlwinm r11,r12,32-20,0x7ff /* isolate exponent of fpb */ + mr r10,r4 /* load fpa.lo */ + cmpwi cr6,r9,0 /* set fpa.sign */ + /* load fpb.lo (already in r6)*/ + cmpwi cr7,r12,0 /* set fpb.sign */ + rlwinm. r9,r9,0,0x000fffff /* isolate fpa.hi */ + SAVEREG(17) /* save r17 */ + cror cr6_zero,cr0_2,cr0_2 /* fpa.zero = fpa.hi == 0 */ + rlwinm. r12,r12,0,0x000fffff /* isolate fpb.hi */ + SAVEREG(18) /* save r18 */ + cror cr7_zero,cr0_2,cr0_2 /* fpb.zero = fpa.hi == 0 */ + cmpwi cr0,r10,0 /* if (fpa.lo == 0) */ + SAVEREG(19) /* save r19 */ + crand cr6_zero,cr6_zero,cr0_2 /* fpa.zero = fpa.hi==0 && fpa.lo==0 */ + cmpwi cr0,r6,0 /* if (fpa.lo == 0) */ + SAVEREG(20) /* save r20 */ + crand cr7_zero,cr7_zero,cr0_2 /* fpb.zero = fpb.hi==0 && fpb.lo==0 */ + cmpwi cr2,r8,0x7ff /* if (fpa.exp == DEXPMAX) */ + SAVEREG(21) /* save r21 */ + SAVEREG(22) /* save r22 */ + cmpwi cr0,r8,0 /* if (fpa.exp == 0) */ + SAVEREG(23) /* save r23 */ + crand cr6_zero,cr6_zero,cr0_2 /* fpa.zero=(fpa.exp==0 && fpa==0) */ + SAVEREG(24) /* save r24 */ + crandc cr0_2,cr0_2,cr6_zero /* if (fpa.exp==0 && fpa!=0) */ + cmpwi cr3,r11,0x7ff /* if (fpb.exp == DEXPMAX) */ + beq+ denormal_a /* { Add implied 1 to significand */ + oris r9,r9,0x0010 /* fpa.hi |= 0x00100000; */ + b adone /* } else { */ +denormal_a: + addi r8,r8,1 /* fpa.exp++; */ +adone: /* } */ + SAVEREG(25) /* save r25 */ + cmpwi cr0,r11,0 /* if (fpb.exp == 0) */ + SAVEREG(26) /* save r26 */ + crand cr7_zero,cr7_zero,cr0_2 /* fpb.zero=(fpb.exp==0 && fpb==0) */ + SAVEREG(27) /* save r27 */ + crandc cr0_2,cr0_2,cr7_zero /* if (fpb.exp==0 && fpb!=0) */ + SAVEREG(28) /* save r28 */ + beq+ denormal_b /* { Add implied 1 to significand */ + oris r12,r12,0x0010 /* fpb.hi |= 0x00100000; */ + b bdone /* } else { */ +denormal_b: + addi r11,r11,1 /* fpb.exp++; */ +bdone: /* } */ + + bt cr2_2,a_INForNaN /* if (fpa.exp == 0x7ff) goto a_INForNaN; */ + bt cr3_2,b_INForNaN /* if (fpb.exp == 0x7ff) goto b_INForNaN; */ + bt cr6_zero,a_zero /* if (fpa.zero) goto a_zero; */ + bt cr7_zero,b_zero /* if (fpb.zero) goto b_zero; */ + + + SAVEREG(29) /* save r29 */ +/* force divisor to be normalized */ + cntlzw r7,r12 /* s = cntlz(fpb.hi); */ + cmpwi cr0,r7,11 /* if (s > 11) */ + SAVEREG(30) /* save r30 */ + SAVEREG(31) /* save r31 */ + ble drdone /* { */ + cmpwi cr0,r7,32 /* if (s < 32) */ + addi r7,r7,-11 /* r6 = s-11; */ + subfic r5,r7,32 /* r5 = 43-s; */ + bge bshgt32 /* { */ + subf r11,r7,r11 /* fpb.exp -= (s-11); */ + slw r12,r12,r7 /* fpb.hi <<= s-11; */ + srw r0,r6,r5 /* temp = fpa.lo >> 43-s; */ + or r12,r12,r0 /* fpb.hi |= temp; */ + slw r6,r6,r7 /* fpb.lo <<= s-11; */ + b drdone /* } */ +bshgt32: /* else */ + /* { */ + addi r11,r11,-32 /* fpb.exp -= 32; */ + cntlzw r7,r6 /* s = cntlz(fpb.lo); */ + cmpwi cr0,r7,11 /* if (s >= 11) */ + blt bshgt43 /* { */ + addi r7,r7,-11 /* r6 = s-11; */ + subf r11,r7,r11 /* fpb.exp -= (s-11); */ + slw r12,r6,r7 /* fpb.hi = fpb.lo << s-11; */ + addi r6,0,0 /* fpb.lo = 0; */ + b drdone /* } */ +bshgt43: /* else */ + /* { */ + subfic r7,r7,11 /* r6 = 11-s; */ + add r11,r7,r11 /* fpb.exp += (11-s); */ + srw r12,r6,r7 /* fpb.hi = fpb.lo >> 11-s; */ + subfic r5,r7,32 /* r5 = 21+s; */ + slw r6,r6,r5 /* fpb.lo << (21+s); */ + /* } */ + /* } */ +drdone: /* } */ +/* right justify a 16 bit divisor */ + rlwinm r7,r12,32-5,0x0000ffff /* dr = fpb.hi >> 5; */ +/* Calculate sign */ + crxor cr6_sign,cr6_sign,cr7_sign +/* set up for loop */ + crset first_loop /* first_loop = true; */ + addi r0,0,4 /* r0 = 4; */ + mtctr r0 /* ctr = 4; */ + addi r31,0,0 /* t.save = 0; */ +do_divide: /* for (nest=0;nest<5;nest++) { */ +/* force dividend to be normalized */ + cntlzw r28,r9 /* s = cntlz(fpa.hi); */ + cmpwi cr0,r28,11 /* if (s > 11) */ + ble dddone /* { */ + cmpwi cr0,r28,32 /* if (s < 32) */ + addi r28,r28,-11 /* r28 = s-11; */ + subfic r5,r28,32 /* r5 = 43-s; */ + bge ashgt32 /* { */ + subf r8,r28,r8 /* fpa.exp -= (s-11); */ + slw r9,r9,r28 /* fpa.hi <<= s-11; */ + srw r0,r10,r5 /* temp = fpa.lo >> 43-s; */ + or r9,r9,r0 /* fpa.hi |= temp; */ + slw r10,r10,r28 /* fpa.lo <<= s-11; */ + srw r0,r31,r5 /* temp = r.save >> 43-s; */ + or r10,r10,r0 /* fpa.lo |= temp; */ + b dddone /* } */ +ashgt32: /* else */ + /* { */ + addi r8,r8,-32 /* fpa.exp -= 32; */ + cntlzw r28,r10 /* s = cntlz(fpa.lo); */ + cmpwi cr0,r28,11 /* if (s >= 11) */ + blt ashgt43 /* { */ + addi r28,r28,-11 /* r28 = s-11; */ + subf r8,r28,r8 /* fpa.exp -= (s-11); */ + slw r9,r10,r28 /* fpa.hi = fpa.lo << s-11; */ + subfic r5,r28,32 /* r5 = 43-s; */ + srw r0,r31,r5 /* temp = t.save >> 43-s; */ + or r9,r9,r0 /* fpa.hi |= temp; */ + slw r10,r31,r28 /* fpa.lo = t.save << s-11; */ + b dddone /* } */ +ashgt43: /* else */ + /* { */ + subfic r28,r28,11 /* r28 = 11-s; */ + add r8,r28,r8 /* fpa.exp += (11-s); */ + srw r9,r10,r28 /* fpa.hi = fpa.lo >> 11-s; */ + subfic r5,r28,32 /* r5 = 21+s; */ + slw r10,r10,r5 /* fpa.lo << (21+s); */ + srw r0,r31,r28 /* temp = tsave >> 11-s; */ + or r10,r10,r0 /* fpa.lo |= temp; */ + /* } */ + /* } */ +dddone: /* } */ +/* left justify dividend */ + rlwinm r5,r9,11,0xfffff800 /* dd = fpa.hi << 11; */ + rlwimi r5,r10,11,0x000007ff /* dd |= fpa.lo >> 21; */ +/* estimate quotient (truncate to 16 bits) */ + divwu r27,r5,r7 /* e.hi = dd/dr; */ + cntlzw r28,r27 /* s = cntlz(e.hi); */ + addi r26,r8,1023+15 /* e.exp = fpa.exp + DEXPBIAS+15; */ + subf r26,r11,r26 /* e.exp -= fpb.exp; */ + subf r26,r28,r26 /* e.exp -= s; */ + addi r28,r28,-11 /* r28 = s-11; */ + rlwnm r27,r27,r28,0x001fffe0 /* e.hi = (e.hi << s-11) & 0x001fffe0; */ +eshdone: +/* t (temporary) = b (divisor) * e (estimate) */ +/* Multiply b-low and e */ + mulhwu r24,r6,r27 /* r24,r25 = fpb.lo * e; */ + mullw r25,r6,r27 +/* r29,r30,r31 = r24,r25 >> 20 */ + rlwinm r29,r24,32-20,0xfff /* r29 = r24 >> 20; */ + rlwinm r30,r24,12,0xfffff000 /* r30 = r24 << 12; */ + rlwimi r30,r25,32-20,0xfff /* r30 += (r25>>20)&0xfff; */ + rlwinm r31,r25,12,0xfffff000 /* r31 = r25 << 12; */ +/* Multiply b-high and e */ + mulhwu r24,r12,r27 /* r24,r25 = fpb.hi * e; */ + mullw r25,r12,r27 +/* r21,r22 = r24,r25 << 12 */ + rlwinm r21,r24,12,0xfffff000 /* t.hi = r24 << 12; */ + rlwimi r21,r25,32-20,0xfff /* t.hi += ((r25>>20)&0xfff); */ + rlwinm r22,r25,12,0xfffff000 /* t.lo = r25 << 12; */ +/* r21,r22,r31 += r29,r30,r31 */ + addc r22,r22,r30 /* t.lo += r30; */ + adde r21,r21,r29 /* t.hi += r29 + (CA); */ + add r20,r11,r26 /* t.exp = fpb.exp + e.exp; */ + addi r20,r20,-1023 /* t.exp -= DEXPBIAS; */ + +/* r (remainder) = a (dividend) - t (temporary) */ + cmpw cr0,r20,r8 /* if (t.exp < fpa.exp) */ + subf r28,r20,r8 /* shift = fpa.exp - t.exp; */ + subfic r5,r28,32 /* r5 = 32-shift; */ + bge+ tnoshift /* { */ + srw r31,r31,r28 /* t.save >> shift; */ + slw r0,r22,r5 /* temp = t.lo << (32-shift); */ + or r31,r31,r0 /* t.save |= temp; */ + srw r22,r22,r28 /* t.lo >>= shift; */ + slw r0,r21,r5 /* temp = t.hi << (shift-32); */ + or r22,r22,r0 /* t.lo |= temp; */ + srw r21,r21,r28 /* t.hi >>= shift; */ +tnoshift: /* } */ + subfic r31,r31,0 /* t = -t; */ + subfze r22,r22 + subfze r21,r21 +do_rem: + adde r25,r22,r10 /* r.lo = fpa.lo + t.lo; */ + adde. r24,r21,r9 /* r.hi = fpa.hi + t.hi; */ + subf r20,r26,r17 /* shift = c.exp - e.exp; */ + bge+ addinq /* if (r.hi >= 0) { */ + addi r27,r27,-32 /* e.hi -= 32; */ + b eshdone +addinq: /* } */ +/* c (quotient) += e (estimate) */ + bt first_loop,first /* if (first_loop) { */ + cmpwi cr0,r20,32 /* if (shift < 32) */ + subfic r21,r20,32 /* r21 = 32-shift; */ + addi r0,0,0 /* round = 0; */ + bge shgt32 /* { */ + slw r28,r27,r21 /* e.lo = e.hi << (32-shift); */ + srw r27,r27,r20 /* e.hi >>= shift; */ + b addit /* } */ +shgt32: /* else */ + /* { */ + cmpwi cr0,r20,64 /* if (shift => 64) */ + addi r20,r20,-32 /* shift -= 32; */ + subfic r21,r20,32 /* r21 = 32-shift; */ + bge setstky /* goto setstky; */ + srw r28,r27,r20 /* e.lo = e.hi >> shift; */ + slw r0,r27,r21 /* round = e.hi << (32-shift); */ + addi r27,0,0 /* e.hi = 0; */ +addit: /* } */ + addc r23,r23,r0 /* c.save += round; */ + adde r19,r19,r28 /* c.lo += e.lo; */ + adde r18,r18,r27 /* c.hi += e.hi; */ + b adddone /* } */ +first: /* else */ + /* { */ + crclr first_loop /* first_loop = false; */ + cntlzw r23,r27 /* r23 = cntlz(e.hi); */ + addi r19,r23,-11 /* r19 = r23 - 11; */ + subf r17,r19,r26 /* c.exp = e.exp - r19; */ + slw r18,r27,r19 /* c.hi = e.hi << r19; */ + addi r19,0,0 /* c.lo = 0; */ + addi r23,0,0 /* c.save = 0; */ +adddone: /* } */ + or r0,r24,r25 /* if (r.hi == 0 && r.lo ==0 && */ + or. r0,r0,r31 /* r.save == 0) */ + mr r9,r24 /* a.hi = r.hi; */ + mr r10,r25 /* a.lo = r.lo; */ + beq divdone /* break; */ + bdnz do_divide /* } end for */ +setstky: + ori r23,r23,0x0001 /* sticky bit if non-zero remainder */ +divdone: + +/* put results from r18,r19 into r9 and r10 */ + mr r8,r17 /* fpa.exp = c.exp; */ + mr r9,r18 /* fpa.hi = c.hi; */ + mr r10,r19 /* fpa.lo = c.lo; */ + mr r0,r23 /* round = c.save; */ + + RESTREG(17) /* restore r17 */ + RESTREG(18) /* restore r18 */ + RESTREG(19) /* restore r19 */ + RESTREG(20) /* restore r20 */ + RESTREG(21) /* restore r21 */ + RESTREG(22) /* restore r22 */ + RESTREG(23) /* restore r23 */ + RESTREG(24) /* restore r24 */ + RESTREG(25) /* restore r25 */ + RESTREG(26) /* restore r26 */ + RESTREG(27) /* restore r27 */ + +/* Fix up number in denormalized range */ + cmpwi cr0,r8,1 /* if (fpa.exp < 1) */ + RESTREG(28) /* restore r28 */ + RESTREG(29) /* restore r29 */ + addis r5,0,0x8000 /* r5 = 0x80000000 */ + bge+ normexp /* { */ + subfic r12,r8,1 /* shift = 1-fpa.exp; */ + li r8,0 /* fpa.exp = 0; */ + cmpwi cr0,r12,32 /* if ( shift < 32) */ + cmpwi cr1,r0,0 /* sticky = (round!=0); */ + bge lose32 /* { */ + srw r0,r0,r12 /* round >>= shift; */ + subfic r11,r12,32 /* r11 = 32-shift; */ + slw r7,r10,r11 /* temp = fpa.lo << (32-shift); */ + or r0,r0,r7 /* round |= temp; */ + srw r10,r10,r12 /* fpa.lo >>= shift; */ + slw r7,r9,r11 /* temp = fpa.hi << (32-shift); */ + or r10,r10,r7 /* fpa.lo |= temp; */ + srw r9,r9,r12 /* fpa.hi >>= shift; */ + beq cr1,normexp /* if (sticky) */ + ori r0,r0,0x0001 /* round |= 0x00000001; */ + b normexp /* } */ +lose32: /* else */ + /* { */ + addi r12,r12,-32 /* shift -= 32; */ + cmpwi cr0,r12,32 /* if (shift < 32) */ + bge lose64 /* { */ + or. r7,r0,r10 /* sticky = (round!=0||fpa.lo!=0); */ + srw r0,r10,r12 /* round = fpa.lo >> shift; */ + subfic r11,r12,32 /* r11 = 32-shift; */ + slw r7,r9,r11 /* temp = fpa.hi << (shift-32); */ + or r0,r0,r7 /* round |= temp; */ + srw r10,r9,r12 /* fpa.lo = fpa.hi >> shift; */ + addi r9,0,0 /* fpa.hi = 0; */ + beq normexp /* if (sticky) */ + ori r0,r0,0x0001 /* round |= 0x00000001; */ + b normexp /* } */ +lose64: /* else */ + /* { */ + addi r9,0,0 /* fpa.hi = 0; */ + addi r10,0,0 /* fpa.lo = 0; */ + addi r0,0,0 /* round = 0; */ + /* } */ + /* } */ +normexp: /* } */ + cmpwi cr0,r8,2047 /* if (fpa.exp >= DEXPMAX) */ + RESTREG(30) /* restore r30 */ + RESTREG(31) /* restore r31 */ + blt+ roundit /* { Set fpa = INF */ + addi r8,0,2047 /* fpa.exp = DEXPMAX; */ + addi r9,0,0 /* fpa.hi = 0; */ + addi r10,0,0 /* fpa.lo = 0; */ + b noround /* goto noround; */ +roundit: /* } */ +/* round fpa to nearest */ + cmplw cr7,r0,r5 /* if ((round >= 0x80000000) || */ + blt cr7,noround /* */ + andi. r7,r10,0x0001 /* (((fpa.lo & 0x00000001) && */ + crand cr0_2,cr7_2,cr0_2 /* (round == 0x80000000))) */ + beq noround /* { */ + addic. r10,r10,1 /* fpa.lo++; */ + bne noround /* if (fpa.lo == 0) { */ + addi r9,r9,1 /* fpa.hi++; */ + lis r7,0x10 /* r7 = 0x00200000; */ + cmpw cr0,r9,r7 /* if (fpa.hi == 0x00200000) */ + bne noround /* { */ + addi r8,r8,1 /* fpa.exp++; */ + /* } */ + /* } */ +noround: /* } */ +/* fpt = fpa; */ + rlwimi r9,r8,20,0x7ff00000 /* fpa.hi |= fpa.exp << 20; */ + bf cr6_sign,nosign /* if (fpa.sign) { */ + oris r9,r9,0x8000 /* fpa.hi |= 0x80000000; */ +nosign: /* } */ + mr r3,r9 /* FRT = fpa.hi; */ + mr r4,r10 /* FRT = fpa.lo; */ + mtcr r16 /* restore cr */ + RESTREG(16) /* restore r16 */ + b return_common /* return; */ + +a_zero: /* fpa == 0; */ + bt cr7_zero,rtn_NaN /* if (fpb.zero) goto rtn_NaN; */ + crxor cr0_0,cr6_sign,cr7_sign /* crbit0 = fpa.sign ^ fpb.sign; */ + mfcr r0 /* r0 = cr; */ + rlwimi r3,r0,0,0x80000000 /* insert r0 bit 0 into r8 bit 0 */ + /* FRT = r3,r4; */ + mtcr r16 /* restore cr */ + RESTREG(16) /* restore r16 */ + b return_common /* return; */ +rtn_NaN: + oris r3,r3,0x7ff8 /* fpa = QNaN; */ + rlwinm r3,r3,0,0x7fffffff /* r3 &= 0x7fffffff; */ + /* FRT = r3,r4; */ + mtcr r16 /* restore cr */ + RESTREG(16) /* restore r16 */ + b return_common /* return; */ + +b_zero: /* return INF; */ + lis r3,0x7ff0 /* r3 = 0x7ff00000; */ + li r4,0 /* r4 = 0; */ + crxor cr0_0,cr6_sign,cr7_sign /* crbit0 = fpa.sign ^ fpb.sign; */ + mfcr r0 /* r0 = cr; */ + rlwimi r3,r0,0,0x80000000 /* insert r0 bit 0 into r8 bit 0 */ + mtcr r16 /* restore cr */ + RESTREG(16) /* restore r16 */ + b return_common /* return; */ + + +a_INForNaN: +/* fpa is either INF or NaN */ + rlwinm r0,r3,0,0x000fffff + or. r0,r0,r4 /* if (fpa.hi == 0 && fpa.lo == 0) */ + beq a_INF /* goto a_INF; */ +a_NaN: /* return QNaN; */ + oris r3,r3,0x0008 /* FRA->hi |= 0x80000; */ + mtcr r16 /* restore cr */ + RESTREG(16) /* restore r16 */ + b return_common /* return; */ + + +b_INForNaN: +/* fpb is either INF or NaN */ + rlwinm r0,r5,0,0x000fffff + or. r0,r0,r6 /* if (fpb.hi == 0 && fpb.lo == 0) */ + beq b_INF /* goto b_INF; */ +b_NaN: /* return QNaN; */ + oris r3,r5,0x0008 /* FRB->hi |= 0x80000; */ + mr r4,r6 /* FRT = r3,r4; */ + mtcr r16 /* restore cr */ + RESTREG(16) /* restore r16 */ + b return_common /* return; */ + +a_INF: /* return INF; */ + lis r3,0x7ff0 /* r3 = 0x7ff00000; */ + li r4,0 /* r4 = 0; */ + crxor cr0_0,cr6_sign,cr7_sign /* crbit0 = fpa.sign ^ fpb.sign; */ + bf cr3_2,nNaN /* if (fpb.inf) goto rtn_NaN; */ + rlwinm r0,r5,0,0x000fffff + or. r0,r6,r0 + bne b_NaN + b rtn_NaN /* if (fpb.inf) goto rtn_NaN; */ +nNaN: mfcr r0 /* r0 = cr; */ + rlwimi r3,r0,0,0x80000000 /* insert r0 bit 0 into r8 bit 0 */ + /* FRT = r3,r4; */ + mtcr r16 /* restore cr */ + RESTREG(16) /* restore r16 */ + b return_common /* return; */ + +b_INF: /* return 0.0; */ + li r3,0 /* r3 = 0; */ + li r4,0 /* r4 = 0; */ + crxor cr0_0,cr6_sign,cr7_sign /* crbit0 = fpa.sign ^ fpb.sign; */ + mfcr r0 /* r0 = cr; */ + rlwimi r3,r0,0,0x80000000 /* insert r0 bit 0 into r8 bit 0 */ + mtcr r16 /* restore cr */ + RESTREG(16) /* restore r16 */ + b return_common /* return; */ + +function_epilog(__divdf3) + +#endif + +#ifdef L__divsf3 + +/* fpopt/ppc_fdivs.S, pl_FPE_common, pl_linux 11/24/03 16:17:33 */ +/*----------------------------------------------------------------------------- */ +/* Copyright (c) 2003, IBM Corporation */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* * Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* * Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* * Neither the name of IBM nor the names of its contributors */ +/* may be used to endorse or promote products derived from this */ +/* software without specific prior written permission. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ +/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ +/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, */ +/* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, */ +/* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR */ +/* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY */ +/* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE */ +/* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/*----------------------------------------------------------------------------- */ +/* */ +/* Function: divide two single floating point values. frt = fpa / fpb */ +/* Input: r3(fpa), r4(fpb) */ +/* Output: r3(frt) */ +/* Notes: 1. No stack frame is created for the normal case of this function, */ +/* so the following registers must be preserved, as required by */ +/* ABI specification: */ +/* LR, CR0, R1, R2, R13-R31 */ +/* 2. For the full function case, a stack frame is created, following */ +/* ABI specification, and non-volatile registers are saved and */ +/* restored on the stack. */ +/* 3. operation performed according to IEEE754-1985 standard with */ +/* rounding mode = nearest even. */ +/* */ +/*----------------------------------------------------------------------------- */ + +#include "ppc4xx.inc" +#include "fpeLib.inc" + +#define first_loop 4 + +function_prolog(__divsf3) + + mfcr r0 /* save cr */ + +/* load fpa into r8,r9,r10 and cr6. load fpb into r11, r12, r13 and cr7 */ + mr r9,r3 /* load fpa.exp, fpa.S, fpa.hi */ + mr r12,r4 /* load fpb.exp, fpb.S, fpb.hi */ + rlwinm r8,r9,9,0xff /* isolate exponent of fpa into r8 */ + rlwinm r11,r12,9,0xff /* isolate exponent of fpb into r11 */ + cmpwi cr6,r9,0 /* set fpa.sign */ + cmpwi cr7,r12,0 /* set fpb.sign */ +/* test for normal: */ + cmpwi cr0,r8,0xff /* if (fpa.exp == SEXPMAX) */ + cmpwi cr1,r8,0 /* if (fpa.exp == 0) */ + cmpwi cr2,r11,0xff /* if (fpb.exp == SEXPMAX) */ + cmpwi cr3,r11,0 /* if (fpb.exp == 0) */ + beq full + beq cr1,full + beq cr2,full + beq cr3,full + +/* normal case only: */ + oris r9,r9,0x0080 /* fpa.hi |= 0x00800000; */ + oris r12,r12,0x0080 /* fpb.hi |= 0x00800000; */ + +/* Calculate exponent */ + subf r8,r11,r8 /* result.exp = fpa.exp - fpb.exp; */ + /* ------- note: r11 is now free */ + addic. r8,r8,+127 /* result.exp += SEXPBIAS; check for negative */ + /* result.exp += 1 (for dividing left-just */ + /* numbers if result is left-just) */ + cmpwi cr2,r8,0xff /* if (result.exp) >= SEXPMAX */ + ble full /* or less than minimum */ + bge cr2,full + +/* Zero out the non-number part of the operands: */ +/* for the sign bit plus one working bit: */ + rlwinm r9,r9,0,8,31 /* r9 = fpa */ + rlwinm r12,r12,0,8,31 /* r12 = fpb */ + +/* Divide the numbers (25 bits of long division); */ +/* r5 holds the intermediate differences. */ + li r5,25 /* calculate 24 signif + 1 sticky */ + mtctr r5 + +longdivloop: +/* r5 = r9 - r12: */ + subfc. r5,r12,r9 + adde r11,r11,r11 /* rot result left and put CA into LSb of result */ + blt+ zerobit + /* if intermed diff positive then: */ + adde r9,r5,r5 /* r9 gets r5 << 1 */ + bdnz longdivloop + b done25 + +zerobit: /* else: */ + adde r9,r9,r9 /* r9 gets r9 << 1 */ + bdnz longdivloop +done25: + +/* Since we divided one left-justified number by another left-justified */ +/* number, there are two possibilities for the result: */ +/* either (the msb is 1) or */ +/* (the msb is 0 and the next bit is 1). If the msb is 0, then we need to */ +/* calculate one more bit (to make 25 significant bits). */ +/* NOTE: the msb is now in bit 7 (=32-25) of result=r11, because we have */ +/* calculated 25 bits. */ + rlwinm. r5,r11,0,7,7 /* test hi-bit = bit 7 of result */ + bne+ highbit_is_1 + addic. r8,r8,-1 /* shifted case: exponent is 1 less than */ + /* non-shifted. */ + beq full /* exponent=0 is special case */ + addi r5,0,1 + mtctr r5 + b longdivloop +highbit_is_1: + +/* We now have the first 25 bits of the result. Bits beyond 24 are only */ +/* needed to determine rounding. If bit 25 (the last one just calculated) */ +/* is 0, then we don't round. If it is 1, then we'll calculate up to 12 more */ +/* bits, looking for a 1. The first time we see a 1, we know to round up. */ +/* But even if all 12 are 0, we still don't know for sure to not round, */ +/* so we punt to the full routine. */ + rlwinm r5,r11,0,30,31 + cmpwi r5,1 + bne roundup + +/* At this point the LSb is 0 and the sticky bit is 1; we need more bits */ +/* to determine rounding -- if any subsequent bit is 1, then we round up. */ +/* We'll try 12 more bits before giving up: */ + addi r5,0,12 + mtctr r5 +seek1loop: +/* r5 = r9 - r12: */ + subfc. r5,r12,r9 + bge roundup + /* (result(this bit) = 0) */ + adde r9,r9,r9 /* r9 gets r9 << 1 */ + bdnz seek1loop + b full + +roundup: /* adding 1 to the sticky bit performs standard */ + addi r11,r11,1 /* rounding; i.e. if sticky=0 then other bits */ + +/* We're done! Just assemble the result back into IEEE format: */ +/* We need to get the msb from bit 7 to bit 8 of the result, so shift 1 */ + srwi r3,r11,1 + +/* Put the exponent into bits 1-8 of the MSW: */ + rlwimi r3,r8,23,1,8 + +/* Calculate sign bit: */ + crxor cr6_sign,cr6_sign,cr7_sign + bf cr6_sign,signbit0 + oris r3,r3,0x8000 /* set sign bit */ + +signbit0: + mtcr r0 /* restore cr */ + blr /* return */ + +/*---------------- end of normal-only case --------------- */ +full: + mtcr r0 /* restore CR */ + + mflr r0 /* save link register in caller's stack */ + stw r0,4(r1) + stwu r1,-STACKSIZE(r1) /* set up stack frame to save regs */ + + SAVEREG(19) /* save r19 */ + mfcr r19 /* save cr */ + +/* load fpa into r8,r9,r10 and cr6. load fpb into r11, r12, r4 and cr7 */ + mr r9,r3 /* load fpa.exp, fpa.S, fpa.hi */ + mr r12,r4 /* load fpb.exp, fpb.S, fpb.hi */ + rlwinm r8,r9,9,0xff /* isolate exponent of fpa */ + rlwinm r11,r12,9,0xff /* isolate exponent of fpb */ + cmpwi cr6,r9,0 /* set fpa.sign */ + cmpwi cr7,r12,0 /* set fpb.sign */ + rlwinm. r9,r9,0,0x007fffff /* isolate fpa.hi */ + cror cr6_zero,cr0_2,cr0_2 /* fpa.zero = fpa.hi == 0 */ + rlwinm. r12,r12,0,0x007fffff /* isolate fpb.hi */ + cror cr7_zero,cr0_2,cr0_2 /* fpb.zero = fpa.hi == 0 */ + SAVEREG(20) /* save r20 */ + cmpwi cr0,r8,0xff /* if (fpa.exp == SEXPMAX) */ + SAVEREG(21) /* save r21 */ + crand cr6_inf,cr6_zero,cr0_2 /* fpa.inf=(fpa.exp==SEXPMAX && fpa==0) */ + SAVEREG(22) /* save r22 */ + crandc cr6_NaN,cr0_2,cr6_zero /* fpa.NaN=(fpa.exp==SEXPMAX && fpa!=0) */ + cmpwi cr0,r8,0 /* if (fpa.exp == 0) */ + SAVEREG(23) /* save r23 */ + crand cr6_zero,cr6_zero,cr0_2 /* fpa.zero=(fpa.exp==0 && fpa==0) */ + SAVEREG(24) /* save r24 */ + crandc cr0_2,cr0_2,cr6_zero /* if (fpa.exp==0 && fpa!=0) */ + cmpwi cr1,r11,0xff /* if (fpb.exp == SEXPMAX) */ + beq denormal_a /* { Add implied 1 to significand */ + oris r9,r9,0x0080 /* fpa.hi |= 0x00800000; */ + b adone /* } else { */ +denormal_a: + addi r8,r8,1 /* fpa.exp++; */ +adone: /* } */ + crand cr7_inf,cr7_zero,cr1_2 /* fpb.inf=(fpb.exp==SEXPMAX && fpb==0) */ + SAVEREG(25) /* save r25 */ + crandc cr7_NaN,cr1_2,cr7_zero /* fpb.NaN=(fpb.exp==SEXPMAX && fpb!=0) */ + cmpwi cr0,r11,0 /* if (fpb.exp == 0) */ + SAVEREG(26) /* save r26 */ + crand cr7_zero,cr7_zero,cr0_2 /* fpb.zero=(fpb.exp==0 && fpb==0) */ + SAVEREG(27) /* save r27 */ + crandc cr0_2,cr0_2,cr7_zero /* if (fpb.exp==0 && fpb!=0) */ + beq denormal_b /* { Add implied 1 to significand */ + oris r12,r12,0x0080 /* fpb.hi |= 0x00800000; */ + b bdone /* } else { */ +denormal_b: + addi r11,r11,1 /* fpb.exp++; */ +bdone: /* } */ + + bt cr6_NaN,a_NaN /* if (fpa.NaN) goto a_NaN; */ + bt cr7_NaN,b_NaN /* if (fpb.NaN) goto b_NaN; */ + bt cr6_zero,a_zero /* if (fpa.zero) goto a_zero; */ + bt cr6_inf,a_INF /* if (fpa.inf) goto a_INF; */ + bt cr7_zero,b_zero /* if (fpb.zero) goto b_zero; */ + bt cr7_inf,b_INF /* if (fpb.inf) goto b_INF; */ + + li r10,0 /* zero fpa.lo for double precision divide */ + +/* left justify divisor (really only needed for denormal numbers) */ + cntlzw r6,r12 /* s = cntlz(fpb.hi); */ + addi r6,r6,-8 /* r6 = s-8; */ + subf r11,r6,r11 /* fpb.exp -= (s-8); */ + slw r12,r12,r6 /* fpb.hi <<= s-8; */ + +/* right justify a 16 bit divisor */ + rlwinm r6,r12,32-8,0x0000ffff /* dr = fpb.hi >> 8; */ + +/* Calculate sign */ + crxor cr6_sign,cr6_sign,cr7_sign + +/* set up for loop */ + crset first_loop /* first_loop = true; */ + li r0,3 /* r0 = 3; */ + mtctr r0 /* ctr = 3; */ + +do_divide: /* for (nest=0;nest<3;nest++) { */ +/* force dividend into one register */ + cntlzw r4,r9 /* s = cntlz(fpa.hi); */ + cmpwi cr0,r4,8 /* if (s > 8) */ + ble dddone /* { */ + cmpwi cr0,r4,32 /* if (s < 32) */ + addi r4,r4,-8 /* r4 = s-8; */ + subfic r5,r4,32 /* r5 = 40-s; */ + bge ashgt32 /* { */ + subf r8,r4,r8 /* fpa.exp -= (s-8); */ + slw r9,r9,r4 /* fpa.hi <<= s-8; */ + srw r0,r10,r5 /* temp = fpa.lo >> 40-s; */ + or r9,r9,r0 /* fpa.hi |= temp; */ + b dddone /* } */ +ashgt32: /* else */ + /* { */ + addi r8,r8,-32 /* fpa.exp -= 32; */ + cntlzw r4,r10 /* s = cntlz(fpa.lo); */ + cmpwi cr0,r4,8 /* if (s >= 8) */ + blt ashgt43 /* { */ + addi r4,r4,-8 /* r4 = s-8; */ + subf r8,r4,r8 /* fpa.exp -= (s-8); */ + slw r9,r10,r4 /* fpa.hi = fpa.lo << s-8; */ + b dddone /* } */ +ashgt43: /* else */ + /* { */ + subfic r4,r4,8 /* r4 = 8-s; */ + add r8,r4,r8 /* fpa.exp += (8-s); */ + srw r9,r10,r4 /* fpa.hi = fpa.lo >> 8-s; */ + /* } */ + /* } */ +dddone: /* } */ +/* left justify dividend */ + rlwinm r5,r9,8,0xffffff00 /* dd = fpa.hi << 8; */ +/* estimate quotient (truncate to 16 bits) */ + divwu r27,r5,r6 /* e.hi = dd/dr; */ + cntlzw r4,r27 /* s = cntlz(e.hi); */ + addi r26,r8,127+15 /* e.exp = fpa.exp + SEXPBIAS+15 */ + subf r26,r11,r26 /* e.exp -= fpb.exp; */ + subf r26,r4,r26 /* e.exp -= s; */ + addi r4,r4,-8 /* r4 = s-8; */ + rlwnm r27,r27,r4,0x00ffff00 /* e.hi = (e.hi << s-8) & 0x00ffff00; */ +eshdone: +/* t (temporary) = b (divisor) * e (estimate) */ +/* Multiply b and e */ + mulhwu r21,r12,r27 /* t.hi,t.lo = fpb.hi * e; */ + mullw r22,r12,r27 + rlwinm r21,r21,9,0xfffffe00 /* t.hi <<= 9; */ + rlwimi r21,r22,32-23,0x1ff /* t.hi += ((t.lo>>23)&0x1ff); */ + rlwinm r22,r22,9,0xfffffe00 /* t.lo <<= 9; */ + add r20,r11,r26 /* t.exp = fpb.exp + e.exp; */ + addi r20,r20,-127 /* t.exp -= (SEXPBIAS); */ + +/* r (remainder) = a (dividend) - t (temporary) */ + cmpw cr0,r20,r8 /* if (t.exp < fpa.exp) */ + subf r4,r20,r8 /* shift = fpa.exp - t.exp; */ + subfic r5,r4,32 /* r5 = 32-shift; */ + bge+ tnoshift /* { */ + srw r22,r22,r4 /* t.lo >>= shift; */ + slw r0,r21,r5 /* temp = t.hi << (shift-32); */ + or r22,r22,r0 /* t.lo |= temp; */ + srw r21,r21,r4 /* t.hi >>= shift; */ +tnoshift: + subfic r22,r22,0 /* t = -t; */ + subfze r21,r21 + add. r21,r21,r9 /* r.hi = fpa.hi + t.hi; */ + bge addinq /* if (r.hi >= 0) goto addinq; */ + +/* remainder is neg; estimate too high. decrement and retry. */ + addi r27,r27,-256 /* e.hi -= 0x100; */ + b eshdone /* goto eshdone; */ + +addinq: +/* c (quotient) += e (estimate) */ + bf+ first_loop,notfirst /* if (first_loop) { */ + crclr first_loop /* first_loop = false; */ + cntlzw r23,r27 /* r23 = cntlz(e.hi); */ + addi r25,r23,-8 /* r25 = r23 - 8; */ + subf r23,r25,r26 /* c.exp = e.exp - r25; */ + slw r24,r27,r25 /* c.hi = e.hi << r25; */ + li r25,0 /* c.lo = 0; */ + b adddone /* } */ +notfirst: /* else */ + /* { */ + subf r20,r26,r23 /* shift = c.exp - e.exp; */ + cmpwi cr0,r20,32 /* if (shift < 32) */ + subfic r5,r20,32 /* r5 = 32-shift; */ + bge shgt32 /* { */ + slw r4,r27,r5 /* e.lo = e.hi << (32-shift); */ + srw r27,r27,r20 /* e.hi >>= shift; */ + b addit /* } */ +shgt32: /* else */ + /* { */ + addi r20,r20,-32 /* shift -= 32; */ + srw r4,r27,r20 /* e.lo = e.hi >> shift; */ + li r27,0 /* e.hi = 0; */ +addit: /* } */ + add r25,r25,r4 /* c.lo += e.lo; */ + add r24,r24,r27 /* c.hi += e.hi; */ +adddone: /* } */ + or. r0,r21,r22 /* if (r.hi == 0 && r.lo ==0) */ + mr r9,r21 /* a.hi = r.hi; */ + mr r10,r22 /* a.lo = r.lo; */ + beq divdone /* break; */ + bdnz do_divide /* } end for */ +divdone: + +/* put results from r23,r24,r25 into r8,r9 and r10 */ + mr r8,r23 /* fpa.exp = c.exp; */ + mr r9,r24 /* fpa.hi = c.hi; */ + mr r10,r25 /* fpa.lo = c.lo; */ + + RESTREG(20) /* restore r20 */ + RESTREG(21) /* restore r21 */ + RESTREG(22) /* restore r22 */ + RESTREG(23) /* restore r23 */ + +/* Normalize results */ + cntlzw r5,r9 /* s = cntlz(fpa.hi); */ + cmpwi cr0,r5,8 /* if (s < 8) */ + RESTREG(24) /* restore r24 */ + RESTREG(25) /* restore r25 */ + bge noshrght /* { */ + subfic r5,r5,8 /* r5 = 8-s; */ + subfic r11,r5,32 /* r11 = 21+s; */ + srw r10,r10,r5 /* fpa.lo >>= (8-s); */ + slw r6,r9,r11 /* temp = fpa.hi << (24+s); */ + or r10,r10,r6 /* fpa.lo |= temp; */ + srw r9,r9,r5 /* fpa.hi >>= (8-s); */ + add r8,r8,r5 /* fpa.exp += (8-s); */ + cmpwi cr0,r8,0xff /* if (fpa.exp == SEXPMAX) */ + bne noshift /* { */ + li r9,0 /* fpa.hi = 0; */ + li r10,0 /* fpa.lo = 0; */ + /* } */ + b noshift /* } */ +noshrght: /* else if (s > 8) */ + beq noshift /* { */ + cmpwi cr0,r5,32 /* if (s < 32) */ + bge gt32 /* { */ + addi r5,r5,-8 /* r5 = s-8; */ + subf r8,r5,r8 /* fpa.exp -= (s-8); */ + slw r9,r9,r5 /* fpa.hi <<= (s-8); */ + subfic r11,r5,32 /* r11 = 43-s; */ + srw r4,r10,r11 /* temp == fpa.lo >> (46-s); */ + or r9,r9,r4 /* fpa.hi |= temp; */ + slw r10,r10,r5 /* fpa.lo <<= (s-8); */ + b noshift /* } */ +gt32: /* else */ + /* { */ + cntlzw r5,r10 /* s = cntlz(fpa.lo); */ + addi r4,r5,32-8 /* r4 = s+(32-8); */ + subf r8,r4,r8 /* fpa.exp -= (s+(32-8)); */ + addic. r5,r5,-8 /* r5 = s-8; */ + ble sh32le /* if (shift > 8) { */ + slw r9,r10,r5 /* fpa.hi = (fpa.lo << (s-8)); */ + b noshift /* } else { */ +sh32le: + subfic r5,r5,0 /* r5 = 8-s; */ + srw r9,r10,r5 /* fpa.hi = fpa.lo >> (8-s); */ + subfic r11,r5,32 /* r11 = 32-(8-s); */ + slw r10,r10,r11 /* fpa.lo <<= (32-(11-s)); */ + /* } */ + /* } */ +noshift: /* } */ +/* Check for single overflow */ + cmpwi cr0,r8,0xff /* if (fpa.exp > (SEXPMAX)) */ + blt over /* { result = infinity */ + li r8,0xff /* fpa.exp = SEXPMAX; */ + li r9,0 /* fpa.hi = 0; */ + li r10,0 /* fpa.lo = 0; */ + RESTREG(26) /* restore r26 */ + RESTREG(27) /* restore r27 */ + b noround /* } */ + +/* Fix up number in normalized range */ +over: + cmpwi cr0,r8,1 /* if (fpa.exp >= 1 */ + RESTREG(26) /* restore r26 */ + RESTREG(27) /* restore r27 */ + blt denormexp /* { */ + lis r12,0x8000 /* r12 = guard bit; */ + cmplw cr1,r10,r12 /* if (fpa.lo >= 0x80000000) guard set */ + blt cr1,noround /* { */ + andi. r0,r9,0x00000001 /* if ((fpa.hi.lobit==0) && */ + crand cr0_2,cr0_2,cr1_2 /* (guard == 0x80000000)) no sticky or round */ + beq noround /* goto noround; */ + addi r9,r9,1 /* fpa.hi++; */ + lis r6,0x0100 /* r6 = 0x01000000; */ + cmpw cr0,r9,r6 /* if (fpa.hi == 0x01000000) carry out */ + bne noround /* { */ + addi r8,r8,1 /* fpa.exp++; */ + /* } */ + /* } */ + /* } */ + b noround /* } */ +denormexp: /* else */ + /* { */ + subfic r12,r8,1 /* shift=1-fpa.exp; */ + li r8,0 /* fpa.exp = 0 for denormalized result */ + cmpwi cr0,r12,25 /* if ( shift < 25) */ + bge rnd2zero /* { */ + subfic r11,r12,32 /* r11 = 32-shift; */ + slw. r0,r10,r11 /* round = fpa.lo << (32-shift); */ + srw r5,r10,r12 /* r5 = fpa.lo >> shift; */ + slw r6,r9,r11 /* temp = fpa.hi << (32-shift); */ + or r5,r5,r6 /* r5 |= temp; */ + srw r4,r9,r12 /* r4 = fpa.hi >> shift; */ + beq nostk /* if (round) */ + ori r5,r5,0x0001 /* r5 |= 0x00000001; */ +nostk: + lis r6,0x8000 /* r6 = 0x80000000; */ + cmplw cr1,r5,r6 /* if (guard bit set) */ + mr r9,r4 /* move back into results register */ + blt cr1,noround /* { */ + andi. r0,r9,0x00000001 /* if ((fpa.hi.lobit==0) && */ + crand cr0_2,cr0_2,cr1_2 /* (fpa.lo= 0x80000000)) no sticky or round */ + beq noround /* goto noround; */ + addi r9,r9,1 /* fpa.hi++; (round up) */ + /* } */ + lis r6,0x0080 /* r6 = 0x00800000; */ + cmpw cr0,r9,r6 /* if (fpa.hi == 0x00800000) carry out */ + bne noround /* { */ + addi r8,r8,1 /* fpa.exp++; */ + /* } */ + b noround /* } */ +rnd2zero: /* else */ + /* { */ + li r8,0 /* fpa.exp = 0; */ + li r9,0 /* fpa.hi = 0; */ + li r10,0 /* fpa.lo = 0; */ + /* } */ +noround: /* } */ +/* fpt = fpa; */ + rlwimi r9,r8,23,0x7f800000 /* fpa.hi |= fpa.exp << 23; */ + bf cr6_sign,nosign /* if (fpa.sign) { */ + oris r9,r9,0x8000 /* fpa.hi |= 0x80000000; */ +nosign: /* } */ + mr r3,r9 /* *FRT = fpa.hi; */ + mtcr r19 /* restore cr */ + RESTREG(19) /* restore r19 */ + +common_return: + lwz r1,0(r1) /* previous stack frame */ + lwz r0,4(r1) /* saved link register */ + mtlr r0 /* restore link register */ + blr + +a_zero: /* fpa == 0; */ + li r3,0 /* result= +-0 ; */ + bt cr7_zero,ab_zero /* if (fpb.zero) goto ab_zero; */ + crxor cr0_0,cr6_sign,cr7_sign /* crbit0 = fpa.sign ^ fpb.sign; */ + mfcr r0 /* r0 = cr; */ + rlwimi r3,r0,0,0x80000000 /* insert sign into result */ + mtcr r19 /* restore cr */ + RESTREG(19) /* restore r19 */ + b common_return /* return; */ + +ab_zero: /* need to return special QNaN */ + nand r3,r3,r3 /* flip all the bits */ + mtcr r19 /* restore cr */ + RESTREG(19) /* restore r19 */ + b common_return /* return; */ + +b_zero: /* return INF; */ + lis r3,0x7f80 + crxor cr0_0,cr6_sign,cr7_sign /* crbit0 = fpa.sign ^ fpb.sign; */ + mfcr r0 /* r0 = cr; */ + rlwimi r3,r0,0,0x80000000 /* insert r0 bit 0 into r8 bit 0 */ + mtcr r19 /* restore cr */ + RESTREG(19) /* restore r19 */ + b common_return /* return; */ + +a_NaN: /* return QNaN; */ + oris r3,r3,0x0040 /* FRA->hi |= 0x400000; */ + mtcr r19 /* restore cr */ + RESTREG(19) /* restore r19 */ + b common_return /* return; */ + +b_NaN: /* return QNaN; */ + mr r3,r4 + oris r3,r3,0x0040 /* FRB->hi |= 0x80000; */ + mtcr r19 /* restore cr */ + RESTREG(19) /* restore r19 */ + b common_return /* return; */ + +a_INF: /* return INF; */ + lis r3,0x7f80 /* load INF into results register */ + crxor cr0_0,cr6_sign,cr7_sign /* crbit0 = fpa.sign ^ fpb.sign; */ + mfcr r0 /* r0 = cr; */ + bt cr7_inf,rtn_NaN /* if (fpb.inf) goto rtn_NaN; */ + rlwimi r3,r0,0,0x80000000 /* insert sign into result */ + mtcr r19 /* restore cr */ + RESTREG(19) /* restore r19 */ + b common_return /* return; */ + +rtn_NaN: + lis r3,0x7fc0 /* fpa = QNaN; */ + rlwimi r3,r0,0,0x80000000 /* insert sign into result */ + mtcr r19 /* restore cr */ + RESTREG(19) /* restore r19 */ + b common_return /* return; */ + +b_INF: /* return 0.0; */ + li r3,0 + crxor cr0_0,cr6_sign,cr7_sign /* crbit0 = fpa.sign ^ fpb.sign; */ + mfcr r0 /* r0 = cr; */ + rlwimi r3,r0,0,0x80000000 /* insert sign into result */ + mtcr r19 /* restore cr */ + RESTREG(19) /* restore r19 */ + b common_return /* return; */ + +function_epilog(__divsf3) + +#endif + +#ifdef L__muldf3 + +/* fpopt/ppc_fmul.S, pl_FPE_common, pl_linux 11/24/03 16:17:34 */ +/*----------------------------------------------------------------------------- */ +/* Copyright (c) 2003, IBM Corporation */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* * Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* * Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* * Neither the name of IBM nor the names of its contributors */ +/* may be used to endorse or promote products derived from this */ +/* software without specific prior written permission. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ +/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ +/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, */ +/* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, */ +/* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR */ +/* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY */ +/* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE */ +/* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/*----------------------------------------------------------------------------- */ +/* */ +/* Function: multiply two double floating point values. frt = fpa * fpb */ +/* Input: r3,r4(fpa) */ +/* r5,r6(fpb) */ +/* Output: r3,r4(frt) */ +/* Notes: 1. No stack frame is created for the normal case of this function, */ +/* so the following registers must be preserved, as required by */ +/* ABI specification: */ +/* LR, CR0, R1, R2, R13-R31 */ +/* 2. For the full function case, a stack frame is created, following */ +/* ABI specification, and non-volatile registers are saved and */ +/* restored on the stack. */ +/* 3. operation performed according to IEEE754-1985 standard with */ +/* rounding mode = nearest even. */ +/* */ +/*----------------------------------------------------------------------------- */ + +#include "ppc4xx.inc" +#include "fpeLib.inc" + +function_prolog(__muldf3) + + mfcr r0 /* save cr */ + mtctr r13 /* save r13 */ + +/* load fpa into r8,r9,r10 and cr6. load fpb into r11, r12, r13 and cr7 */ + mr r9,r3 /* load fpa.exp, fpa.S, fpa.hi */ + mr r12,r5 /* load fpb.exp, fpb.S, fpb.hi */ + rlwinm r8,r9,32-20,0x7ff /* isolate exponent of fpa into r8 */ + rlwinm r11,r12,32-20,0x7ff /* isolate exponent of fpb into r11 */ + mr r10,r4 /* load fpa.lo */ + /* load fpb.lo (already in r6) */ + +/* test for normal: */ + cmpwi cr0,r8,0x7ff /* if (fpa.exp == DEXPMAX) */ + cmpwi cr1,r8,0 /* if (fpa.exp == 0) */ + cmpwi cr2,r11,0x7ff /* if (fpb.exp == DEXPMAX) */ + cmpwi cr3,r11,0 /* if (fpb.exp == 0) */ + beq full /*fpa.exp is max */ + beq cr2,full /* fpb.exp is max */ + beq cr1,tst0a /* fpa.exp is 0 */ + beq cr3,tst0b /* fpb.exp is 0 */ + +/* normal case only: */ + oris r9,r9,0x0010 /* fpa.hi |= 0x00100000; */ + oris r12,r12,0x0010 /* fpb.hi |= 0x00100000; */ + +/* Calculate exponent */ + add r8,r8,r11 /* result.expt = fpa.exp + fpb.exp; */ + /* ------- note: r11 is now free */ + addic. r8,r8,-1022 /* result.exp -= DEXPBIAS; check for negative */ + /* result.exp += 1 (for multiplying left-just */ + /* numbers if result is left-just) */ + cmpwi cr2,r8,0x7fd /* if (result.exp >= DEXPMAX-2) */ + ble full + bge cr2,full + +/* Multiply the numbers: */ +/* Start by shifting everything left 11 bits, so that the MSW is full: */ + rlwinm r9,r9,11,0,20 /* r9 = fpa.hi */ + rlwimi r9,r10,11,21,31 + rlwinm r10,r10,11,0,20 /* r10 = fpa.lo */ + + rlwinm r12,r12,11,0,20 /* r12 = fpb.hi */ + rlwimi r12,r6,11,21,31 + rlwinm r13,r6,11,0,20 /* r13 = fpb.lo */ + +/* Multiply a-high and b-high: */ + mulhwu r7,r9,r12 /* r7 gets high word of result */ + mullw r11,r9,r12 /* r11 gets low word of result */ + +/* Multiply a-high and b-low (upper half only): */ + mulhwu r13,r9,r13 + addc r13,r11,r13 + addze r7,r7 + +/* Multiply a-low and b-high (upper half only): */ + mulhwu r10,r10,r12 + addc r13,r13,r10 + addze. r7,r7 + /* r7/r13 now have result hi/lo */ + /* ------- note: r9-r12 are now free */ +/* There are two possibilities for the result: */ +/* Either (bit 0 is 1) or (bit 0 is 0 and bit 1 is 1). */ +/* If bit 0 is 0, then shift left 1 bit to normalize: */ + blt msb_is_bit0 + addc r13,r13,r13 /* shift r7/r13 left 1 bit */ + adde r7,r7,r7 /* */ + addic. r8,r8,-1 /* shifted case: exponent is 1 less */ + /* than non-shifted */ + beq full /* if exponent out of range, go to full handler */ +msb_is_bit0: + +/* See if we need to round. (NOTE: the exact .5 case needs more precision */ +/* than what we have calculated; in that case, go to the full handler.) */ +/* 53 bits (bits 0-52 of the 2-word result) are significant. The sticky bit */ +/* is bit 53 = bit (53-32= 21) of the low word (r13). If it is 0, then no */ +/* rounding. */ +/* BUT we don't really know the sticky bit for sure yet -- the uncalculated */ +/* low-order 64 bits of the 128-bit product could conceivably cause the low */ +/* bits of our LSW to round up, changing the state of bit 21. */ +/* The round-up is the result of a carry-out from 3 neglected summands, */ +/* so at most it adds 2 to the low word. But we have already */ +/* shifted left one bit, just above, so by now the carry-out could add */ +/* as much as 4. The addition of a 3-bit number can */ +/* propagate up to bit 21 ONLY if bits 22-29 are all ones: */ + rlwinm. r10,r13,0,22,31 + cmpwi cr2,r10,0x3fc /* special case if bits 22-29 all 1's */ + rlwinm r11,r13,0,20,21 /* test LSb and sticky bit */ + bge cr2,full + cmpwi cr3,r11,0x400 /* special rounding if LSb=0 and sticky=1 */ + addic r13,r13,0x400 /* standard rounding = add 1 to sticky */ + bne+ cr3,finish_std_round + beq full /* if LSb=0 and sticky=1 and 22-31 all */ + /* zeros, then special case */ +finish_std_round: + addze. r7,r7 /* add carry-out of low word to high */ + /* -- if this overflowed us, then we */ + /* need to shift back right 1 bit */ + /* and increment the exponent */ + /* to reflect this shift: */ + bne+ std_done + addi r8,r8,1 + rlwinm r13,r13,32-1,1,31 + rlwimi r13,r7,32-1,0,0 + rlwinm r7,r7,32-1,1,31 + +std_done: + +/* We're done! Just assemble the result back into IEEE format: */ +/* r3 = IEEE MSW, r4 = IEEE LSW */ + + xor r9,r3,r5 /* get product sign */ + +/* Shift right 11 bits (= left 21 bits) to put the MSb into bit 11, but */ +/* mask it off since it's implied: */ + rlwinm r3,r7,32-11,12,31 + +/* Bits 0-10 of the IEEE LSW represent bits 21-31 of the result, so they */ +/* come from bits 21-31 of the high word (r7): */ + rlwinm r4,r7,21,0,10 + +/* Bits 11-31 of the IEEE LSW represent bits 32-52 of the result, so they */ +/* come from bits 0-20 of the low word (r13): */ + rlwimi r4,r13,32-11,11,31 + +/* Put the exponent into bits 1-11 of the MSW: */ + rlwimi r3,r8,20,1,11 + +/* Insert sign bit: */ + rlwimi r3,r9,0,0,0 + + mtcr r0 /* restore cr */ + mfctr r13 /* restore r13 */ + blr /* return; */ + +tst0a: + rlwinm. r11,r9,0,1,31 + cmpwi cr2,r10,0 + bne full + bne cr2,full + +/* a is 0: */ +/* Calculate sign: */ +return0: + xor r3,r9,r12 /* get product sign */ + rlwinm r3,r3,0,0,0 + li r4,0 + mtcr r0 /* restore cr */ + mfctr r13 /* restore r13 */ + blr /* return */ + +tst0b: + rlwinm. r11,r12,0,1,31 + cmpwi cr2,r6,0 + bne full + bne cr2,full +/* b is 0: */ + b return0 + +/*---------------- end of normal-only case --------------- */ +full: + mtcr r0 /* restore CR */ + + mflr r0 /* save link register in caller's stack */ + stw r0,4(r1) + stwu r1,-STACKSIZE(r1) /* set up stack frame to hold saved regs */ + + mfctr r13 /* restore r13 */ + + SAVEREG(28) /* save r28 */ + mfcr r28 /* save cr */ + mtctr r13 /* save r13 in CTR */ + +/* load fpa into r8,r9,r10 and cr6. load fpb into r11, r12, r13 and cr7 */ + mr r9,r3 /* load fpa.exp, fpa.S, fpa.hi */ + mr r12,r5 /* load fpb.exp, fpb.S, fpb.hi */ + rlwinm r8,r9,32-20,0x7ff /* isolate exponent of fpa */ + rlwinm r11,r12,32-20,0x7ff /* isolate exponent of fpb */ + mr r10,r4 /* load fpa.lo */ + cmpwi cr6,r9,0 /* set fpa.sign */ + mr r13,r6 /* load fpb.lo */ + cmpwi cr7,r12,0 /* set fpb.sign */ + rlwinm. r9,r9,0,0x000fffff /* isolate fpa.hi */ + cror cr6_zero,cr0_2,cr0_2 /* fpa.zero = fpa.hi == 0 */ + rlwinm. r12,r12,0,0x000fffff /* isolate fpb.hi */ + SAVEREG(29) /* save r29 */ + cror cr7_zero,cr0_2,cr0_2 /* fpb.zero = fpa.hi == 0 */ + cmpwi cr0,r10,0 /* if (fpa.lo == 0) */ + SAVEREG(30) /* save r30 */ + crand cr6_zero,cr6_zero,cr0_2 /* fpa.zero = fpa.hi==0 && fpa.lo==0 */ + cmpwi cr0,r13,0 /* if (fpa.lo == 0) */ + SAVEREG(31) /* save r31 */ + crand cr7_zero,cr7_zero,cr0_2 /* fpb.zero = fpb.hi==0 && fpb.lo==0 */ + cmpwi cr0,r8,0x7ff /* if (fpa.exp == DEXPMAX) */ + crand cr6_inf,cr6_zero,cr0_2 /* fpa.inf=(fpa.exp==DEXPMAX && fpa==0) */ + crandc cr6_NaN,cr0_2,cr6_zero /* fpa.NaN=(fpa.exp==DEXPMAX && fpa!=0) */ + cmpwi cr0,r8,0 /* if (fpa.exp == 0) */ + crand cr6_zero,cr6_zero,cr0_2 /* fpa.zero=(fpa.exp==0 && fpa==0) */ + crandc cr0_2,cr0_2,cr6_zero /* if (fpa.exp==0 && fpa!=0) */ + cmpwi cr1,r11,0x7ff /* if (fpb.exp == DEXPMAX) */ + beq denormal_a /* { Add implied 1 to significand */ + oris r9,r9,0x0010 /* fpa.hi |= 0x00100000; */ + b adone /* } else { */ +denormal_a: + addi r8,r8,1 /* fpa.exp++; */ +adone: /* } */ + crand cr7_inf,cr7_zero,cr1_2 /* fpb.inf=(fpb.exp==DEXPMAX && fpb==0) */ + crandc cr7_NaN,cr1_2,cr7_zero /* fpb.NaN=(fpb.exp==DEXPMAX && fpb!=0) */ + cmpwi cr0,r11,0 /* if (fpb.exp == 0) */ + crand cr7_zero,cr7_zero,cr0_2 /* fpb.zero=(fpb.exp==0 && fpb==0) */ + crandc cr0_2,cr0_2,cr7_zero /* if (fpb.exp==0 && fpb!=0) */ + beq denormal_b /* { Add implied 1 to significand */ + oris r12,r12,0x0010 /* fpb.hi |= 0x00100000; */ + b bdone /* } else { */ +denormal_b: + addi r11,r11,1 /* fpb.exp++; */ +bdone: /* } */ + +/* if (fpa == NaN) return QNaN; */ + bt cr6_NaN,a_NaN /* if (fpa == NaN) goto a_NaN; */ + +/* if (fpb == NaN) return QNaN; */ + bt cr7_NaN,b_NaN /* if (fpb == NaN) goto b_NaN; */ + +/* if (fpa == 0) return fpb; */ + bt cr6_zero,a_zero /* if (fpa == 0) goto a_zero; */ + +/* if (fpb == 0) return fpa; */ + bt cr7_zero,b_zero /* if (fpb == 0) goto b_zero; */ + +/* check for infinities */ + bt cr6_inf,a_INF /* if (fpa.INF) goto a_INF; */ + bt cr7_inf,b_INF /* if (fpb.INF) goto b_INF; */ + +/* Multiply a-low and b-low (hi-result) */ + mulhwu r4,r10,r13 /* r4,r11 = fpa.lo * fpb.lo; */ + +/* Calculate exponent */ + add r8,r8,r11 /* fpa.exp += fpb.exp; */ + addi r8,r8,-1023 /* fpa.exp -= DEXPBIAS; */ + +/* Calculate sign */ + crxor cr6_sign,cr6_sign,cr7_sign + + mullw r11,r10,r13 /* r4,r11 = fpa.lo * fpb.lo; */ + +/* r5,r6,r0 = r4,r11 >> 52 */ + rlwinm r5,r4,32-20,0x00000fff /* n[1] = r4 >> 20; */ + rlwinm r6,r4,12,0xfffff000 /* n[2] = r4 << 12; */ + rlwimi r6,r11,32-20,0x00000fff /* n[2] += ((r11>>20)&0xfff); */ + rlwinm r0,r11,12,0xfffff000 /* n[3] = r11 << 12; */ + +/* Multiply a-high and b-low */ + mulhwu r4,r9,r13 + mullw r11,r9,r13 + +/* r29,r30,r31 = r4,r11 >> 20 */ + rlwinm r29,r4,32-20,0x00000fff /* n[0] = r4 >> 20 */ + rlwinm r30,r4,12,0xfffff000 /* n[1] = r4 << 12 */ + rlwimi r30,r11,32-20,0xfff /* n[1] += (r11>>20)&0xfff */ + rlwinm r31,r11,12,0xfffff000 /* n[2] = r11 << 12 */ + +/* r4,r5,r6 += r29,r30,r31 */ + addc r6,r6,r31 + adde r5,r5,r30 + addze r4,r29 + +/* Multiply a-low and b-high */ + mulhwu r13,r10,r12 + mullw r11,r10,r12 + +/* r29,r30,r31 = r13,r11 >> 20 */ + rlwinm r29,r13,32-20,0xfff /* n[0] = r13 >> 20 */ + rlwinm r30,r13,12,0xfffff000 /* n[1] = r13 << 12 */ + rlwimi r30,r11,32-20,0xfff /* n[1] += (r11>>20)&0xfff */ + rlwinm r31,r11,12,0xfffff000 /* n[2] = r11 << 12 */ + +/* r4,r5,r6 += r29,r30,r31 */ + addc r6,r6,r31 + adde r5,r5,r30 + adde r4,r4,r29 + +/* Multiply a-high and b-high */ + mulhwu r13,r9,r12 + mullw r11,r9,r12 + +/* r29,r30,r31 = r13,r11 << 12 */ + rlwinm r29,r13,12,0xfffff000 /* n[0] = r13 << 12 */ + rlwimi r29,r11,32-20,0xfff /* n[0] += ((r11>>20)&0xfff) */ + rlwinm r30,r11,12,0xfffff000 /* n[1] = r9 << 12 */ + +/* r4,r5,r6 += r29,r30,r31 */ + addc r5,r5,r30 + adde r4,r4,r29 + +/* put results from r4,r5,r6,r0 into r9 and r10 */ + cntlzw r13,r4 /* s = cntlz(n[0]); */ + cmpwi cr0,r13,11 /* if (s == 11) */ + RESTREG(29) /* restore r29 */ + RESTREG(30) /* restore r30 */ + bne notjust /* { */ + cmpwi cr0,r0,0 /* sticky bit? */ + mr r9,r4 /* fpa.hi = n[0]; */ + mr r10,r5 /* fpa.lo = n[1]; */ + mr r0,r6 /* round = n[2]; */ + beq nosticky /* if (sticky) */ + ori r0,r0,0x0001 /* round |= 0x00000001; */ +nosticky: + b noshift /* } */ +notjust: + bgt noshrght /* else if (s < 11) */ + /* { */ + subfic r13,r13,11 /* r13 = 11-s; */ + srw r10,r5,r13 /* fpa.lo = n[1] >> (11-s); */ + subfic r11,r13,32 /* r11 = 21+s; */ + slw r7,r6,r11 /* sticky = shifted out bits */ + or. r0,r0,r7 /* + lo-word */ + srw r0,r6,r13 /* round = n[2] >> (11-s); */ + slw r6,r5,r11 /* temp = n[1] << (21+s); */ + or r0,r0,r6 /* round |= temp; */ + beq nosticky2 + ori r0,r0,0x0001 /* add in sticky bit */ +nosticky2: + slw r6,r4,r11 /* temp = n[0] << (21+s); */ + or r10,r10,r6 /* fpa.lo |= temp; */ + srw r9,r4,r13 /* fpa.hi = n[0] >> (11-s); */ + add r8,r8,r13 /* fpa.exp += (11-s); */ + cmpwi cr0,r8,0x7ff /* if (fpa.exp == DEXPMAX) */ + bne noshift /* { */ + li r9,0 /* fpa.hi = 0; */ + li r10,0 /* fpa.lo = 0; */ + li r0,0 /* round = 0; */ + /* } */ + b noshift /* } */ +noshrght: /* else if (s > 11) */ + beq noshift /* { */ + cmpwi cr0,r13,32 /* if (s < 32) */ + bge gt32 /* { */ + addi r13,r13,-11 /* r13 = s-11; */ + subf r8,r13,r8 /* fpa.exp -= (s-11); */ + slw r9,r4,r13 /* fpa.hi = n[0] << (s-11); */ + subfic r11,r13,32 /* r11 = 43-s; */ + srw r4,r5,r11 /* temp == n[1] >> (43-s); */ + or r9,r9,r4 /* fpa.hi |= temp; */ + slw r10,r5,r13 /* fpa.lo = n[1] << (s-11); */ + srw r5,r6,r11 /* temp = n[2] >> (43-s); */ + or r10,r10,r5 /* fpa.lo |= temp; */ + slw r4,r6,r13 /* r4 = n[2] << (s-11); */ + srw r5,r0,r11 /* r5 = n[3] >> (43-s); */ + slw r0,r0,r13 /* n[3] <<= (s-11); */ + cmpwi cr0,r0,0 /* sticky bit? */ + or r0,r4,r5 /* round = r4 | r5; */ + beq nostk2 /* if (sticky) */ + ori r0,r0,0x0001 /* round |= 0x00000001; */ +nostk2: b noshift /* } */ +gt32: /* else */ + /* { */ + cntlzw r13,r5 /* s = cntlz(n[1]); */ + addi r4,r13,32-11 /* temp = s+(32-11); */ + subf r8,r4,r8 /* fpa.exp -= (s+(32-11)); */ + addic. r13,r13,-11 /* r13 = s-11; */ + ble sh32le /* if (shift > 11) { */ + slw r9,r5,r13 /* fpa.hi = n[1] << (s-11); */ + subfic r11,r13,32 /* r11 = 43-s; */ + srw r4,r6,r11 /* temp = n[2] >> (43-s); */ + or r9,r9,r4 /* fpa.hi |= temp; */ + slw r10,r6,r13 /* fpa.lo = n[2] << (s-11); */ + srw r4,r0,r11 /* temp = n[3] >> (43-s); */ + or r10,r10,r4 /* fpa.lo |= temp; */ + slw r0,r0,r13 /* round <<= (s-11); */ + b noshift /* } else { */ +sh32le: subfic r13,r13,0 /* r13 = 11-s; */ + srw r9,r5,r13 /* fpa.hi = n[1] >> (11-s); */ + subfic r11,r13,32 /* r11 = 32-(11-s); */ + srw r4,r6,r13 /* temp = n[2] >> (11-s); */ + slw r10,r5,r11 /* fpa.lo = n[1] << 32-(11-s); */ + or r10,r10,r4 /* fpa.lo |= temp; */ + srw r0,r0,r13 /* round >>= (11-s); */ + slw r4,r6,r11 /* temp = n[2] >> 32-(11-s); */ + or r0,r0,r4 /* round |= temp; */ + /* } */ + /* } */ +noshift: /* } */ +/* Fix up number in denormalized range */ + cmpwi cr0,r8,1 /* if (fpa.exp < 1) */ + RESTREG(31) /* restore r31 */ + addis r5,0,0x8000 /* r5 = 0x80000000 */ + bge+ normexp /* { */ + subfic r13,r8,1 /* shift = 1-fpa.exp; */ + li r8,0 /* fpa.exp = 0; */ + cmpwi cr0,r13,32 /* if ( shift < 32) */ + cmpwi cr1,r0,0 /* sticky = (round==0); */ + bge lose32 /* { */ + srw r0,r0,r13 /* round >>= shift; */ + subfic r11,r13,32 /* r11 = 32-shift; */ + slw r6,r10,r11 /* temp = fpa.lo << (32-shift); */ + or r0,r0,r6 /* round |= temp; */ + srw r10,r10,r13 /* fpa.lo >>= shift; */ + slw r6,r9,r11 /* temp = fpa.hi << (32-shift); */ + or r10,r10,r6 /* fpa.lo |= temp; */ + srw r9,r9,r13 /* fpa.hi >>= shift; */ +/* Or in bit 11 to detect carry when rounding */ + oris r9,r9,0x0010 /* fpa.hi |= 0x00100000; */ + beq cr1,normexp /* if (sticky) */ + ori r0,r0,0x0001 /* round |= 0x00000001; */ + b normexp /* } */ +lose32: /* else */ + /* { */ + addi r13,r13,-32 /* shift -= 32; */ + cmpwi cr0,r13,32 /* if (shift < 32) */ + bge lose64 /* { */ + or. r6,r0,r10 /* sticky = (round!=0&&fpa.lo!=0); */ + srw r0,r10,r13 /* round = fpa.lo >> shift; */ + subfic r11,r13,32 /* r11 = 32-shift; */ + slw r6,r9,r11 /* temp = fpa.hi << (shift-32); */ + or r0,r0,r6 /* round |= temp; */ + srw r10,r9,r13 /* fpa.lo = fpa.hi >> shift; */ + addi r9,0,0 /* fpa.hi = 0; */ + beq normexp /* if (sticky) */ + ori r0,r0,0x0001 /* round |= 0x00000001; */ + b normexp /* } */ +lose64: /* else */ + /* { */ + addi r9,0,0 /* fpa.hi = 0; */ + addi r10,0,0 /* fpa.lo = 0; */ + addi r0,0,0 /* round = 0; */ + /* } */ + /* } */ +normexp: /* } */ + cmpwi cr0,r8,2047 /* if (fpa.exp >= DEXPMAX) */ + blt+ roundit /* { Set fpa = INF */ + addi r8,0,2047 /* fpa.exp = DEXPMAX; */ + addi r9,0,0 /* fpa.hi = 0; */ + addi r10,0,0 /* fpa.lo = 0; */ + b noround /* goto noround; */ +roundit: /* } */ +/* round fpa to nearest */ + cmplw cr7,r0,r5 /* if ((round >= 0x80000000) || */ + blt cr7,noround /* */ + andi. r6,r10,0x0001 /* (((fpa.lo & 0x00000001) && */ + crand cr0_2,cr7_2,cr0_2 /* (round == 0x80000000))) */ + beq noround /* { */ + addic. r10,r10,1 /* fpa.lo++; */ + /* if (fpa.lo == 0) { */ + addze r9,r9 /* fpa.hi++; */ + addis r6,0,0x20 /* r6 = 0x00200000; */ + cmpw cr0,r9,r6 /* if (fpa.hi == 0x00200000) */ + bne noround /* { */ + addi r8,r8,1 /* fpa.exp++; */ + /* } */ + /* } */ +noround: /* } */ +/* fpt = fpa; */ + rlwimi r9,r8,20,0x7ff00000 /* fpa.hi |= fpa.exp << 20; */ + bf cr6_sign,nosign /* if (fpa.sign) { */ + oris r9,r9,0x8000 /* fpa.hi |= 0x80000000; */ +nosign: /* } */ + /* return; */ +return_common: + mr r3,r9 /* *FRT = fpa.hi; */ + mr r4,r10 /* *FRT = fpa.lo; */ + + mfctr r13 /* restore r13 */ + mtcr r28 /* restore cr */ + RESTREG(28) /* restore r28 */ + lwz r1,0(r1) /* previous stack frame */ + lwz r0,4(r1) /* saved link register */ + mtlr r0 /* restore link register */ + + blr + +a_zero: /* fpa == 0; */ + mr r9,r3 /* r9,r10 = *FRA; */ + mr r10,r4 + bt cr7_inf,rtn_NaN /* if (!fpb.inf) goto rtn_NaN; */ + crxor cr0_0,cr6_sign,cr7_sign /* crbit0 = fpa.sign ^ fpb.sign; */ + mfcr r0 /* r0 = cr; */ + rlwimi r9,r0,0,0x80000000 /* insert r0 bit 0 into r8 bit 0 */ + b return_common /* return; */ + +rtn_NaN: + oris r9,r9,0x7ff8 /* fpa = QNaN; */ + rlwinm r9,r9,0,0x7fffffff /* r9 &= 0x7fffffff; */ + b return_common /* return; */ + +b_zero: /* return fpb; */ + mr r9,r5 /* r9,r10 = *FRB; */ + mr r10,r6 + bt cr6_inf,rtn_NaN /* if (!fpa.inf) goto rtn_NaN; */ + crxor cr0_0,cr6_sign,cr7_sign /* crbit0 = fpa.sign ^ fpb.sign; */ + mfcr r0 /* r0 = cr; */ + rlwimi r9,r0,0,0x80000000 /* insert r0 bit 0 into r8 bit 0 */ + b return_common /* return; */ + +a_NaN: /* return QNaN; */ + mr r9,r3 /* r9,r10 = *FRA; */ + mr r10,r4 + oris r9,r9,0x0008 /* FRA->hi |= 0x80000; */ + b return_common /* return; */ + +b_NaN: /* return QNaN; */ + mr r9,r5 /* r9,r10 = *FRB; */ + mr r10,r6 + oris r9,r9,0x0008 /* FRB->hi |= 0x80000; */ + b return_common /* return; */ + +a_INF: /* return INF; */ +b_INF: /* return INF; */ + lis r9,0x7ff0 /* r9 = 0x7ff00000; */ + li r10,0 /* r10 = 0; */ + crxor cr0_0,cr6_sign,cr7_sign /* crbit0 = fpa.sign ^ fpb.sign; */ + mfcr r0 /* r0 = cr; */ + rlwimi r9,r0,0,0x80000000 /* insert r0 bit 0 into r8 bit 0 */ + b return_common /* return; */ + +function_epilog(__muldf3) + +#endif + +#ifdef L__mulsf3 + +/* fpopt/ppc_fmuls.S, pl_FPE_common, pl_linux 11/24/03 16:17:35 */ +/*----------------------------------------------------------------------------- */ +/* Copyright (c) 2003, IBM Corporation */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* * Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* * Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* * Neither the name of IBM nor the names of its contributors */ +/* may be used to endorse or promote products derived from this */ +/* software without specific prior written permission. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ +/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ +/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, */ +/* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, */ +/* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR */ +/* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY */ +/* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE */ +/* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/*----------------------------------------------------------------------------- */ +/* */ +/* Function: multiply two single floating point values (frt = fpa * fpb) */ +/* Input: r3(fpa), r4(fpb) */ +/* Output: r3(frt) */ +/* Notes: 1. No stack frame is created for this function, so the following */ +/* registers must be preserved, as required by ABI specification: */ +/* LR, CR0, R1, R2, R13-R31 */ +/* 2. operation performed according to IEEE754-1985 standard with */ +/* rounding mode = nearest even. */ +/* */ +/*----------------------------------------------------------------------------- */ + +#include "ppc4xx.inc" +#include "fpeLib.inc" + +function_prolog(__mulsf3) + + mfcr r0 /* save cr */ + +/* isolate exponents */ + rlwinm r8,r3,9,0xff /* isolate exponent of fpa into r8 */ + rlwinm r11,r4,9,0xff /* isolate exponent of fpc into r11 */ + +/* test for normal: */ + cmpwi cr0,r8,0xff /* if (fpa.exp == SEXPMAX) */ + cmpwi cr1,r8,0 /* if (fpa.exp == 0) */ + cmpwi cr2,r11,0xff /* if (fpb.exp == SEXPMAX) */ + cmpwi cr3,r11,0 /* if (fpb.exp == 0) */ + beq full /* fpa.exp is max */ + beq cr2,full /* fpb.exp is max */ + beq cr1,tst0a /* fpa.exp is 0 */ + beq cr3,tst0b /* fpb.exp is 0 */ + +/* normal case only: add in implied one */ + oris r9,r3,0x0080 /* fpa.hi |= 0x00800000; */ + oris r12,r4,0x0080 /* fpb.hi |= 0x00800000; */ + +/* Calculate exponent */ + add r8,r8,r11 /* result.expt = fpa.exp + fpb.exp; */ + /* ------- note: r11 is now free */ + addic. r8,r8,-126 /* result.exp -= SEXPBIAS */ + /* result.exp += 1 (for multiplying left-just */ + /* numbers if result is left-just) */ + + cmpwi cr2,r8,253 /* if (result.exp >= (SEXPMAX--2) : */ + ble full + bge cr2,full + +/* Calculate sign: */ + xor r11,r3,r4 /* get product sign */ + +/* Multiply the numbers: */ +/* Start by shifting everything left 8 bits, so that the MSW is full: */ + rlwinm r5,r9,8,0,23 /* r5 = fpa */ + + rlwinm r6,r12,8,0,23 /* r6 = fpb */ + /* Multiply a and b (upper half only): */ + mulhwu. r5,r5,r6 /* r5 gets high word of result */ + /* ------- note: r6 is now free */ +/* There are two possibilities for the result: */ +/* Either (bit 0 is 1) or (bit 0 is 0 and bit 1 is 1). */ +/* If bit 0 is 0, then shift left 1 bit to normalize: */ + blt msb_is_bit0 + add r5,r5,r5 /* shift fraction (r5) left 1 to fill msb*/ + addic. r8,r8,-1 /* shifted case: exponent is 1 less */ + /* than non-shifted */ + beq full /* if exponent out of range, needs full handling */ +msb_is_bit0: + +/* See if we need to round. (NOTE: the exact .5 case needs more precision */ +/* than what we have calculated; in that case, go to the full handler.) */ +/* 24 bits (bits 0-23 of the result) are significant. The sticky bit */ +/* is bit 24. If it is 0, then no rounding. */ +/* BUT we don't really know the sticky bit for sure yet -- the uncalculated */ +/* low-order 16 bits of the 48-bit product could conceivably cause the low */ +/* bits of our result to round up, changing the state of bit 24. */ +/* The round-up is the result of a carry-out from the low 12 bits, */ +/* so at most it adds 1. But we have already */ +/* shifted left one bit, just above, so by now the carry-out could add */ +/* as much as 2. The addition of a 2-bit number can */ +/* propagate up to bit 24 ONLY if bits 25-30 are all ones: */ + rlwinm. r10,r5,0,25,31 + cmpwi cr2,r10,0x7e /* special case if bits 25-30 all 1's */ + rlwinm r10,r5,0,23,24 /* test LSb and sticky bit */ + bge cr2,full + cmpwi cr3,r10,0x80 /* special rounding if LSb=0 and sticky=1 */ + addic r5,r5,0x80 /* standard rounding = add 1 to sticky */ + bne+ cr3,std_done + beq full /* if LSb=0 and sticky=1 and 22-31 all */ + /* zeros, then special case */ +std_done: + mcrxr cr0 /* if carry out, increment exponent */ + bf cr0_2,nocarry + addi r8,r8,1 +nocarry: + +/* We're done! Just assemble the result back into IEEE format: */ +/* r3 = IEEE result */ +/* Shift right 8 bits to put the MSb into bit 8, but */ +/* mask it off since it's implied: */ + rlwinm r3,r5,32-8,9,31 + +/* Put the exponent into bits 1-8 of the MSW: */ + rlwimi r3,r8,23,1,8 + +/* Insert sign bit: */ + rlwimi r3,r11,0,0,0 + + mtcr r0 /* restore cr */ + blr /* return; */ + +tst0a: + rlwinm. r11,r3,0,1,31 /* FRA = +-0 ? */ + beq return0 + beq cr3,tst0b /* fpb.exp == 0 ? (held in cr3)*/ + b full +/* a is 0: */ +/* Calculate sign: */ +return0: + xor r3,r3,r4 /* get product sign */ + rlwinm r3,r3,0,0,0 /* *FRT = fpa.hi = 0+sign */ + mtcr r0 /* restore cr */ + blr /* return */ + +tst0b: + rlwinm. r11,r4,0,1,31 /* FRC = +-0 ? */ + bne full +/* b is 0: */ + b return0 + +/*---------------- end of normal-only case --------------- */ +full: +.fmsful: + mtcr r0 /* restore CR */ + mtctr r0 /* save cr in ctr */ + +/* load fpa into r8,r9 and cr6. load fpb into r11, r12 and cr7 */ + mr r9,r3 /* load fpa.exp, fpa.S, fpa.hi */ + mr r12,r4 /* load fpb.exp, fpb.S, fpb.hi */ + rlwinm r8,r9,9,0xff /* isolate exponent of fpa */ + rlwinm r11,r12,9,0xff /* isolate exponent of fpb */ + cmpwi cr6,r9,0 /* set fpa.sign */ + cmpwi cr7,r12,0 /* set fpb.sign */ + rlwinm. r9,r9,0,0x007fffff /* isolate fpa.hi */ + cror cr6_zero,cr0_2,cr0_2 /* fpa.zero = fpa.hi == 0 */ + rlwinm. r12,r12,0,0x007fffff /* isolate fpb.hi */ + cror cr7_zero,cr0_2,cr0_2 /* fpb.zero = fpa.hi == 0 */ + cmpwi cr0,r8,0xff /* if (fpa.exp == SEXPMAX) */ + crand cr6_inf,cr6_zero,cr0_2 /* fpa.inf=(fpa.exp==SEXPMAX && fpa==0) */ + crandc cr6_NaN,cr0_2,cr6_zero /* fpa.NaN=(fpa.exp==SEXPMAX && fpa!=0) */ + cmpwi cr0,r8,0 /* if (fpa.exp == 0) */ + crand cr6_zero,cr6_zero,cr0_2 /* fpa.zero=(fpa.exp==0 && fpa==0) */ + crandc cr0_2,cr0_2,cr6_zero /* if (fpa.exp==0 && fpa!=0) */ + cmpwi cr1,r11,0xff /* if (fpb.exp == SEXPMAX) */ + beq denormal_a /* { Add implied 1 to significand */ + oris r9,r9,0x0080 /* fpa.hi |= 0x00800000; */ + b adone /* } else { */ +denormal_a: + addi r8,r8,1 /* fpa.exp++; */ +adone: /* } */ + crand cr7_inf,cr7_zero,cr1_2 /* fpb.inf=(fpb.exp==SEXPMAX && fpb==0) */ + crandc cr7_NaN,cr1_2,cr7_zero /* fpb.NaN=(fpb.exp==SEXPMAX && fpb!=0) */ + cmpwi cr0,r11,0 /* if (fpb.exp == 0) */ + crand cr7_zero,cr7_zero,cr0_2 /* fpb.zero=(fpb.exp==0 && fpb==0) */ + crandc cr0_2,cr0_2,cr7_zero /* if (fpb.exp==0 && fpb!=0) */ + beq denormal_b /* { Add implied 1 to significand */ + oris r12,r12,0x0080 /* fpb.hi |= 0x00800000; */ + b bdone /* } else { */ +denormal_b: + addi r11,r11,1 /* fpb.exp++; */ +bdone: /* } */ + +/* if (fpa == NaN) return QNaN; */ + bt cr6_NaN,a_NaN /* if (fpa == NaN) goto a_NaN; */ + +/* if (fpb == NaN) return QNaN; */ + bt cr7_NaN,b_NaN /* if (fpb == NaN) goto b_NaN; */ + +/* if (fpa == 0) return fpb; */ + bt cr6_zero,a_zero /* if (fpa == 0) goto a_zero; */ + +/* if (fpb == 0) return fpa; */ + bt cr7_zero,b_zero /* if (fpb == 0) goto b_zero; */ + +/* check for infinities */ + bt cr6_inf,a_INF /* if (fpa.INF) goto a_INF; */ + bt cr7_inf,b_INF /* if (fpb.INF) goto b_INF; */ + +/* Calculate exponent */ + add r8,r8,r11 /* fpa.exp += fpb.exp; */ + addi r8,r8,-(127) /* fpa.exp -= (SEXPBIAS); */ + +/* Calculate sign */ + crxor cr6_sign,cr6_sign,cr7_sign + +/* Put a and b significands into r11 and r12 (b already in 12) */ + mr r11,r9 + +/* Multiply a and b then shift left 9 to left-justify */ + mulhwu r9,r11,r12 /* r9,r11 = fpa * fpb; */ + mullw r10,r11,r12 /* r9,r11 = fpa * fpb; */ + + rlwinm r9,r9,9,0xfffffe00 + rlwimi r9,r10,9,0x1ff + rlwinm r10,r10,9,0xfffffe00 + +/* Normalize results */ + cntlzw r5,r9 /* s = cntlz(fpa.hi); */ + cmpwi cr0,r5,8 /* if (s < 8) */ + bge noshrght /* { */ + subfic r5,r5,8 /* r5 = 8-s; */ + subfic r11,r5,32 /* r11 = 24+s; */ + srw r10,r10,r5 /* fpa.lo >>= (8-s); */ + slw r6,r9,r11 /* temp = fpa.hi << (21+s); */ + or r10,r10,r6 /* fpa.lo |= temp; */ + srw r9,r9,r5 /* fpa.hi >>= (8-s); */ + add r8,r8,r5 /* fpa.exp += (8-s); */ + cmpwi cr0,r8,0xff /* if (fpa.exp == SEXPMAX) */ + bne noshift /* { */ + addi r9,0,0 /* fpa.hi = 0; */ + addi r10,0,0 /* fpa.lo = 0; */ + /* } */ + b noshift /* } */ +noshrght: /* else if (s > 8) */ + beq noshift /* { */ + cmpwi cr0,r5,32 /* if (s < 32) */ + bge gt32 /* { */ + addi r5,r5,-8 /* r5 = s-8; */ + subf r8,r5,r8 /* fpa.exp -= (s-8); */ + slw r9,r9,r5 /* fpa.hi <<= (s-8); */ + subfic r11,r5,32 /* r11 = 46-s; */ + srw r4,r10,r11 /* temp == fpa.lo >> (46-s); */ + or r9,r9,r4 /* fpa.hi |= temp; */ + slw r10,r10,r5 /* fpa.lo <<= (s-8); */ + b noshift /* } */ +gt32: /* else */ + /* { */ + cntlzw r5,r10 /* s = cntlz(fpa.lo); */ + addi r4,r5,32-8 /* r4 = s+(32-8); */ + subf r8,r4,r8 /* fpa.exp -= (s+(32-8)); */ + addic. r5,r5,-8 /* r5 = s-11; */ + ble sh32le /* if (shift > 8) { */ + slw r9,r10,r5 /* fpa.hi = (fpa.lo << (s-8)); */ + b noshift /* } else { */ +sh32le: + subfic r5,r5,0 /* r5 = 8-s; */ + srw r9,r10,r5 /* fpa.hi = fpa.lo >> (8-s); */ + subfic r11,r5,32 /* r11 = 32-(8-s); */ + slw r10,r10,r11 /* fpa.lo <<= (32-(8-s)); */ + /* } */ + /* } */ +noshift: /* } */ +/* Fix up number in normalized range */ + cmpwi cr0,r8,1 /* else if(fpa.exp >= 1) */ + blt denormexp /* { */ +/* Check for single overflow */ + cmpwi cr0,r8,255 /* if (fpa.exp >= (SEXPMAX)) */ + blt+ roundit /* { */ + addi r8,0,0xff /* fpa.exp = SEXPMAX; */ + addi r9,0,0 /* fpa.hi = 0; */ + b noround /* goto noround; */ + /* } */ +roundit: + addis r12,0,0x8000 /* */ + cmplw cr1,r10,r12 /* if (round >= 0x80000000) guard set */ + blt cr1,noround /* { */ + andi. r0,r9,0x00000001 /* if (((fpa.lobit )==0) && */ + crand cr0_2,cr0_2,cr1_2 /* (round == 0x80000000)) no sticky or round*/ + beq noround /* goto noround; */ + addi r9,r9,1 /* fpa.hi++; */ + addis r6,0,0x0100 /* */ + cmpw cr0,r9,r6 /* if (fpa.hi == 0x01000000) carry out */ + bne noround /* { */ + addi r8,r8,1 /* fpa.exp++; */ + /* } */ + /* } */ + /* } */ + b noround /* } */ +denormexp: /* else */ + /* { */ + subfic r12,r8,1 /* shift= 1 - fpa.exp; */ + li r8,0 /* fpa.exp = 0; */ + cmpwi cr0,r12,25 /* if ( shift < 25) */ + bge rnd2zero /* { */ + subfic r11,r12,32 /* r11 = 32-shift; */ + slw. r0,r10,r11 /* round = fpa.lo << (32-shift); */ + srw r5,r10,r12 /* r5 = fpa.lo >> shift; */ + slw r6,r9,r11 /* temp = fpa.hi << (32-shift); */ + or r5,r5,r6 /* r5 |= temp; */ + srw r4,r9,r12 /* r4 = fpa.hi >> shift; */ + beq nostk /* if (round) */ + ori r5,r5,0x0001 /* r5 |= 0x00000001; */ +nostk: + addis r6,0,0x8000 /* */ + cmplw cr1,r5,r6 /* if (round >= 0x80000000) guard set */ + blt cr1,chkzro /* { */ + andi. r0,r4,0x00000001 /* if ( fpa.lobit==0) && */ + crand cr0_2,cr0_2,cr1_2 /* (round == 0x80000000)) no sticky or round*/ + beq chkzro /* goto nornd; */ + addi r4,r4,1 /* fpa.hi++; */ + addis r6,0,0x0080 /* */ + cmpw cr0,r4,r6 /* if (fpa.hi == 0x00800000) carry out */ + bne chkzro /* { */ + addi r8,r8,1 /* fpa.exp++; */ +chkzro: /* } */ + /* } */ + cmpwi cr0,r4,0 /* if (fpa.hi == 0 ) */ + mr r9,r4 + beq rnd2zero /* goto rnd2zero; */ + b noround /* } */ +rnd2zero: /* else */ + /* { */ + addi r8,0,0 /* fpa.exp = 0; */ + addi r9,0,0 /* fpa.hi = 0; */ + /* } */ + /* } */ +/* fpt = fpa; */ +noround: /* } */ + rlwimi r9,r8,23,0x7f800000 /* fpa.hi |= fpa.exp << 23; */ + bf cr6_sign,nosign /* if (fpa.sign) { */ + oris r9,r9,0x8000 /* fpa.hi |= 0x80000000; */ +nosign: /* } */ + mr r3,r9 + mfctr r0 /* restore cr */ + mtcr r0 + blr /* return; */ + +a_zero: /* fpa (r3) == 0; */ + bt cr7_inf,rtn_NaN /* if (!fpb.inf) goto rtn_NaN; */ + crxor cr0_0,cr6_sign,cr7_sign /* crbit0 = fpa.sign ^ fpb.sign; */ + mfcr r0 /* r0 = cr; */ + rlwimi r3,r0,0,0x80000000 /* insert r0 bit 0 into r8 bit 0 */ + mfctr r0 /* restore cr */ + mtcr r0 + blr /* return; */ +rtn_NaN: + oris r3,0,0x7fc0 /* fpa = QNaN; */ + mfctr r0 /* restore cr */ + mtcr r0 + blr /* return; */ + +b_zero: /* return fpa; */ + bt cr6_inf,rtn_NaN /* if (!fpa.inf) goto rtn_NaN; */ + crxor cr0_0,cr6_sign,cr7_sign /* crbit0 = fpa.sign ^ fpb.sign; */ + mfcr r0 /* r0 = cr; */ + rlwimi r3,r0,0,0x80000000 /* insert r0 bit 0 into r8 bit 0 */ + mfctr r0 /* restore cr */ + mtcr r0 + blr /* return; */ + +a_NaN: /* return QNaN; */ + oris r3,r3,0x0040 /* FRA->hi |= 0x400000; */ + mfctr r0 /* restore cr */ + mtcr r0 + blr /* return; */ + +b_NaN: /* return QNaN; */ + mr r3,r4 + oris r3,r3,0x0040 /* FRB->hi |= 0x400000; */ + mfctr r0 /* restore cr */ + mtcr r0 + blr /* return; */ + +a_INF: /* return INF; */ +b_INF: /* return INF; */ + addis r3,0,0x7f80 /* FRT = INF; */ + crxor cr0_0,cr6_sign,cr7_sign /* crbit0 = fpa.sign ^ fpb.sign; */ + mfcr r0 /* r0 = sign of result; */ + rlwimi r3,r0,0,0x80000000 /* insert r0 bit 0 into r8 bit 0 */ + mfctr r0 /* restore cr */ + mtcr r0 + blr /* return; */ + +function_epilog(__mulsf3) + +#endif + +#if defined(L__negdf2) || defined(L__negsf2) + +/* fpopt/ppc_fneg.S, pl_FPE_common, pl_linux 11/24/03 16:17:36 */ +/*----------------------------------------------------------------------------- */ +/* Copyright (c) 2003, IBM Corporation */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* * Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* * Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* * Neither the name of IBM nor the names of its contributors */ +/* may be used to endorse or promote products derived from this */ +/* software without specific prior written permission. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ +/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ +/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, */ +/* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, */ +/* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR */ +/* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY */ +/* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE */ +/* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/*----------------------------------------------------------------------------- */ +/* */ +/* Function: negate a single or double floating point number */ +/* Input: r3, (r4 iff double) */ +/* Output: r3, (r4 iff double) */ +/* Notes: 1. No stack frame is created for this function, so the following */ +/* registers must be preserved, as required by ABI specification: */ +/* LR, CR0, R1, R2, R13-R31 */ +/* 2. operation performed according to IEEE754-1985 standard. */ +/* */ +/*----------------------------------------------------------------------------- */ + +#include "ppc4xx.inc" + +#ifdef L__negdf2 +function_prolog(__negdf2) +#endif +#ifdef L__negsf2 +function_prolog(__negsf2) +#endif + + xoris r3,r3,0x8000 /* flip sign of first or only arg */ + blr /* and return */ + +#ifdef L__negdf2 +function_epilog(__negdf2) +#endif +#ifdef L__negsf2 +function_epilog(__negsf2) +#endif + +#endif + +#ifdef L__subdf3 + +/* fpopt/ppc_fsub.S, pl_FPE_common, pl_linux 11/24/03 16:17:37 */ +/*----------------------------------------------------------------------------- */ +/* Copyright (c) 2003, IBM Corporation */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* * Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* * Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* * Neither the name of IBM nor the names of its contributors */ +/* may be used to endorse or promote products derived from this */ +/* software without specific prior written permission. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ +/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ +/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, */ +/* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, */ +/* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR */ +/* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY */ +/* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE */ +/* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/*----------------------------------------------------------------------------- */ +/* */ +/* Function: subtract two double floating point numbers. frt = fpa - fpb */ +/* Input: r3,r4(fpa) */ +/* r5,r6(fpb) */ +/* Output: r3,r4(frt) */ +/* Notes: 1. No stack frame is created for this function, so the following */ +/* registers must be preserved, as required by ABI specification: */ +/* LR, CR0, R1, R2, R13-R31 */ +/* 2. operation performed according to IEEE754-1985 standard with */ +/* rounding mode = nearest even. */ +/* */ +/*----------------------------------------------------------------------------- */ + +#include "ppc4xx.inc" + +.extern _ppc_fadd_common + +function_prolog(__subdf3) + + mr r7, r5 /* load B(high) */ + xoris r7, r7, 0x8000 /* flip sign of B */ + b _ppc_fadd_common /* now pretend this is add */ + +function_epilog(__subdf3) + +#endif + +#ifdef L__subsf3 + +/* fpopt/ppc_fsubs.S, pl_FPE_common, pl_linux 11/24/03 16:17:38 */ +/*----------------------------------------------------------------------------- */ +/* Copyright (c) 2003, IBM Corporation */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* * Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* * Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* * Neither the name of IBM nor the names of its contributors */ +/* may be used to endorse or promote products derived from this */ +/* software without specific prior written permission. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ +/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ +/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, */ +/* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, */ +/* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR */ +/* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY */ +/* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE */ +/* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/*----------------------------------------------------------------------------- */ +/* */ +/* Function: subtract two single floating point numbers. frt = fpa - fpb */ +/* Input: r3(fpa), r4(fpb) */ +/* Output: r3(frt) */ +/* Notes: 1. No stack frame is created for this function, so the following */ +/* registers must be preserved, as required by ABI specification: */ +/* LR, CR0, R1, R2, R13-R31 */ +/* 2. operation performed according to IEEE754-1985 standard with */ +/* rounding mode = nearest even. */ +/* */ +/*----------------------------------------------------------------------------- */ + +#include "ppc4xx.inc" +#include "fpeLib.inc" + +function_prolog(__subsf3) + +.extern _ppc_fadds_common + +/* save cr in r0 */ + mfcr r0 + +/* load fpa into r8,r9. load fpb into r12 */ + xoris r4,r4,0x8000 /* flip sign bit of fpb */ + mr r9,r3 /* load fpa.S, fpa.exp, fpa.hi */ + mr r12,r4 /* load fpb.S, fpb.exp, fpb.hi */ + rlwinm r8,r9,9,0xff /* isolate exponent of fpa */ + b _ppc_fadds_common /* go to common routine */ + +function_epilog(__subsf3) + +#endif + +#ifdef L__extendsfdf2 + +/* fpopt/ppc_ftod.S, pl_FPE_common, pl_linux 11/24/03 16:17:39 */ +/*----------------------------------------------------------------------------- */ +/* Copyright (c) 2003, IBM Corporation */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* * Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* * Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* * Neither the name of IBM nor the names of its contributors */ +/* may be used to endorse or promote products derived from this */ +/* software without specific prior written permission. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ +/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ +/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, */ +/* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, */ +/* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR */ +/* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY */ +/* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE */ +/* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/*----------------------------------------------------------------------------- */ +/* */ +/* Function: convert single floating point to double */ +/* Input: r3 */ +/* Output: r3,r4 */ +/* Notes: 1. No stack frame is created for this function, so the following */ +/* registers must be preserved, as required by ABI specification: */ +/* LR, CR0, R1, R2, R13-R31 */ +/* 2. operation performed according to IEEE754-1985 standard. */ +/* */ +/*----------------------------------------------------------------------------- */ + +#include "ppc4xx.inc" +#include "fpeLib.inc" + +function_prolog(__extendsfdf2) + + mfcr r0 /* Use r0 to save CR */ +/* Take single apart */ + rlwinm. r8,r3,9,0xFF /* extract exponent (and set CR0) */ + addi r9,r8,-1 /* make zero look like high value */ + cmpli cr1,r9,0xFE /* r9 >= FE means zero/denorm/INF/NaN */ + addi r9,r8,896 /* +1023 - 127 */ + rlwinm r4,r3,29,0xE0000000 /* fill lower word with 3 lsb's of orig */ + srawi r11,r3,3 /* position sign and fraction for DP */ + bge cr1,unusual /* not a standard normalized number */ + +insert_exp: + rlwimi r11,r9,20,0x7FF00000 /* insert biased exponent */ + +do_store: + mr r3,r11 /* move first word */ + /* second word already in r4 */ + mtcr r0 /* restore condition register */ + blr +/* */ +INF_NaN: + oris r11,r11,0x7FF0 /* mark as INF/NaN */ + rlwinm. r9,r3,0,0x007fffff /* if NaN, */ + beq cr0,do_store + oris r11,r11,0x0008 /* make into QNaN */ + b do_store /* and otherwise same as norm */ +/* */ +/* here we have a INF or NaN, or else a zero or denorm */ +/* */ +unusual: + bne INF_NaN /* if original exp not zero, then INF.. */ +/* */ +/* zero or denorm. */ +/* */ + rlwinm. r10,r3,0,0x7FFFFFFF /* clear sign. */ + cntlzw r5,r10 /* get location of first bit */ + ori r11,r3,0 /* just copy input (assume a zero) */ + beq do_store /* input is a zero - r4 already set */ + /* (assume that this branch is "free") */ +/* */ +/* here for a denorm */ +/* note that this path is penalized, but zeros are considered to be much */ +/* more common, and the zero argument path has been optimized */ +/* */ + addi r7,r5,-11 /* get rotate amount */ +/* Note that the fraction is "pulled up" to its final position in the resulting */ +/* double */ + rlwnm r11,r10,r7,0x000FFFFF /* insert fraction (leading bit in bit 11) */ + rlwnm r4,r10,r7,0xE0000000 /* low order bits to r4 */ +/* The -3 is because the shift count is 3 less than the true value because */ +/* the fractions are in different positions in the source single and target */ +/* double. */ + subfic r9,r7,897-3 /* compute biased exp */ + rlwimi r11,r3,0,0x80000000 /* insert sign bit (Arrgh!) */ + b insert_exp + + +function_epilog(__extendsfdf2) + +#endif + +#ifdef L__fixsfsi + +/* fpopt/ppc_ftoi.S, pl_FPE_common, pl_linux 11/24/03 16:17:40 */ +/*----------------------------------------------------------------------------- */ +/* Copyright (c) 2003, IBM Corporation */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* * Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* * Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* * Neither the name of IBM nor the names of its contributors */ +/* may be used to endorse or promote products derived from this */ +/* software without specific prior written permission. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ +/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ +/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, */ +/* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, */ +/* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR */ +/* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY */ +/* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE */ +/* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/*----------------------------------------------------------------------------- */ +/* */ +/* Function: Convert single floating point to 4-byte integer. */ +/* Input: r3 */ +/* Output: r3 */ +/* Notes: 1. No stack frame is created for this function, so the following */ +/* registers must be preserved, as required by ABI specification: */ +/* LR, CR0, R1, R2, R13-R31 */ +/* 2. operation performed according to IEEE754-1985 standard. */ +/* 3. ISO/ANSI C requires that conversions to integers be rounded */ +/* toward zero (in short, leftover fraction bits are truncated). */ +/* */ +/*----------------------------------------------------------------------------- */ + +#include "ppc4xx.inc" +#include "fpeLib.inc" + +function_prolog(__fixsfsi) + +/* Save CR in r6 */ + mfcr r6 +/* r8 = exp r9 = hifrac r10 = lofrac */ + rlwinm r9,r3,7,0x7fffff80 /* extract fraction and left justify */ + rlwinm r8,r3,9,0x000000FF /* extract exponent */ +/* exponent test for Infinity or xNAN */ + cmpi cr0,0,r8,255 /* test for Infinity or NAN */ + beq Infinity_or_NAN +/* exponent test for large operand */ + cmpi cr0,0,r8,157 /* test for large operand */ + bgt Large_operand +/* Normal operand processing here */ +/* r9 is frac[0:22] */ + cmpi cr0,0,r8,0 /* test exp for 0 */ + addic. r8,r8,-126 /* exp = exp - 126 */ + beq Zero_exp + addic. r8,r8,-1 /* exp = exp - 127 */ + oris r9,r9,0x4000 /* set implicit 1 */ +Zero_exp: + blt Zeros + subfic r0,r8,31 + or. r0,r0,r0 + mtctr r0 + b ltest +cloop: + rlwinm r9,r9,31,0x7FFFFFFF /* shift right by 1, zero fill */ +ltest: + bdnz cloop + +/* Form final result */ +no_inc: + or. r3,r3,r3 /* set CR for sign bit */ + bge fin_pos +/* sign is negative */ + nand r9,r9,r9 /* ones comp of r9 */ + addi r9,r9,1 /* now we have 2's complement */ +fin_pos: +/* check size of result */ + or. r3,r9,r9 /* check for zero, and set r3 to result */ + beq Zeros + +return_results: /* r3 contains signed integer result */ + mtcr r6 /* restore CR */ + blr + +/* Infinity or a NAN */ +Infinity_or_NAN: + or. r0,r9,r9 /* test rest for zero (infinity test) */ + bne NAN +/* Infinity if here (also, Large operand) */ +Large_operand: + or. r3,r3,r3 + blt Neg_infinity +/* form value for converted positive infinity */ + lis r3,0x7FFF + ori r3,r3,0xFFFF + b return_results +Neg_infinity: + lis r3,0x8000 + b return_results +NAN: + lis r3,0x8000 + b return_results +Zeros: + li r3,0 /* Store zero */ + b return_results + +function_epilog(__fixsfsi) + +#endif + +#ifdef L__fixunssfsi + +/* fpopt/ppc_ftoui.S, pl_FPE_common, pl_linux 11/24/03 16:19:34 */ +/*----------------------------------------------------------------------------- */ +/* Copyright (c) 2003, IBM Corporation */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* * Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* * Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* * Neither the name of IBM nor the names of its contributors */ +/* may be used to endorse or promote products derived from this */ +/* software without specific prior written permission. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ +/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ +/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, */ +/* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, */ +/* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR */ +/* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY */ +/* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE */ +/* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/*----------------------------------------------------------------------------- */ +/* */ +/* Function: Convert single floating point to 4-byte unsigned integer. */ +/* Input: r3(fpa) */ +/* Output: r3 */ +/* Notes: 1. A stack frame is created, following ABI specification, and */ +/* non-volatile registers are saved and restored on the stack. */ +/* 2. This is not a standard IEEE operation. */ +/* 3. Algorithm (Wtype = signed integer and Wtype_MIN=0x80000000) */ +/* if (fpa >= - (Wtype_MIN) */ +/* return (Wtype) (fpa + Wtype_MIN) - Wtype_MIN; */ +/* return (Wtype) a; */ +/* */ +/*----------------------------------------------------------------------------- */ + +#include "ppc4xx.inc" +#include "fpeLib.inc" + +function_prolog(__fixunssfsi) + +/* For negative numbers, just convert to signed integer */ + or. r3,r3,r3 + blt __fixsfsi /* return directly from there */ + + rlwinm r8,r3,9,0x000000FF /* extract exponent */ +/* For numbers less than maximum positive signed integer, convert normally */ + cmpwi r8, 0x009e + blt __fixsfsi /* return directly form there */ + +/* Create stack frame, just to save link register! */ + mflr r0 /* save link register in caller's stack */ + stw r0,4(r1) + stwu r1,-STACKSIZE(r1) /* set up stack frame to hold saved regs */ + +/* floating add the minimum signed integer value */ + lis r4, 0xCF00 + bl __addsf3 + +/* convert result to signed integer */ + bl __fixsfsi + +/* add minimum signed integer value */ + addis r3,r3,0x8000 + +/* restore link register, and return */ + lwz r1,0(r1) /* previous stack frame */ + lwz r0,4(r1) /* saved link register */ + mtlr r0 /* restore link register */ + blr /* return */ +function_epilog(__fixunssfsi) + +#endif + +#ifdef L__floatsidf + +/* fpopt/ppc_itod.S, pl_FPE_common, pl_linux 11/24/03 16:17:41 */ +/*----------------------------------------------------------------------------- */ +/* Copyright (c) 2003, IBM Corporation */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* * Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* * Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* * Neither the name of IBM nor the names of its contributors */ +/* may be used to endorse or promote products derived from this */ +/* software without specific prior written permission. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ +/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ +/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, */ +/* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, */ +/* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR */ +/* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY */ +/* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE */ +/* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/*----------------------------------------------------------------------------- */ +/* */ +/* Function: Convert 4-byte integer to double floating point value. */ +/* Input: r3 */ +/* Output: r3,r4 */ +/* Notes: 1. No stack frame is created for this function, so the following */ +/* registers must be preserved, as required by ABI specification: */ +/* LR, CR0, R1, R2, R13-R31 */ +/* 2. operation performed according to IEEE754-1985 standard. */ +/* */ +/*----------------------------------------------------------------------------- */ + +#include "ppc4xx.inc" +#include "fpeLib.inc" + +function_prolog(__floatsidf) + +/* Save CR in r6 */ + mfcr r6 +/* Handle the 0 case */ + or. r5,r3,r3 + li r4,0 /* init lo-word to 0 in any case */ + bne nonzero /* r3 already 0, so just return */ + mtcr r6 + blr +nonzero: + rlwinm r5,r5,0,0x80000000 /* preserve only the sign bit */ + bge posnumb + neg r3,r3 /* make negative number positive */ +posnumb: + cntlzw r7,r3 /* how many leading zeros? */ + addic. r7,r7,-11 /* leave room for exponent */ + + beq noshift + blt rightshift + slw r3,r3,r7 /* more than 11 leading 0's, so left shift */ + b noshift +rightshift: + neg r8,r7 /* r8 = - shift count */ + addi r9,r7,32 /* r9 = 32 - shift count */ + slw r4,r3,r9 + srw r3,r3,r8 +noshift: /* now put result together */ + subfic r7,r7,1043 /* adjust biased exponent by shift count */ + rlwimi r3,r7,20,0x7ff00000 /* put exponent and fraction together */ + or r3,r3,r5 /* add in sign */ + mtcr r6 + blr + + +function_epilog(__floatsidf) + +#endif + +#ifdef L__floatsisf + +/* fpopt/ppc_itof.S, pl_FPE_common, pl_linux 11/24/03 16:17:42 */ +/*----------------------------------------------------------------------------- */ +/* Copyright (c) 2003, IBM Corporation */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* * Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* * Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* * Neither the name of IBM nor the names of its contributors */ +/* may be used to endorse or promote products derived from this */ +/* software without specific prior written permission. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ +/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ +/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, */ +/* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, */ +/* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR */ +/* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY */ +/* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE */ +/* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/*----------------------------------------------------------------------------- */ +/* */ +/* Function: Convert 4-byte integer to single floating point value. */ +/* Input: r3 */ +/* Output: r3 */ +/* Notes: 1. No stack frame is created for this function, so the following */ +/* registers must be preserved, as required by ABI specification: */ +/* LR, CR0, R1, R2, R13-R31 */ +/* 2. operation performed according to IEEE754-1985 standard with */ +/* rounding mode = nearest even. */ +/* */ +/*----------------------------------------------------------------------------- */ + +#include "ppc4xx.inc" +#include "fpeLib.inc" + +function_prolog(__floatsisf) + +/* Save CR in r6 */ + mfcr r6 +/* Handle the 0 case */ + or. r5, r3, r3 + bne nonzero /* r3 already 0, so just return */ + mtcr r6 + blr +nonzero: + rlwinm r5, r5, 0, 0x80000000 /* preserve only the sign bit */ + bge posnumb + neg r3, r3 /* make negative number positive */ +posnumb: + cntlzw r7, r3 /* how many leading zeros? */ + addic. r7, r7, -8 /* leave room for exponent */ + + li r10, 150 /* initial exponent value */ + beq noshift + blt rightshift + slw r3, r3, r7 /* more than 8 leading 0's, so left shift */ + b noshift +rightshift: + neg r8, r7 /* r8 = - shift count */ + addi r9, r7, 32 /* r9 = 32 - shift count */ + slw r4, r3, r9 + srw r3, r3, r8 + /* round to nearest even */ + + lis r8,0x8000 /* r5 = 0x80000000 (guard bit) */ + cmplw cr7,r4,r8 /* if ((round >= 0x80000000) || */ + blt cr7,noshift /* */ + andi. r6,r3,0x0001 /* (((fpa.lo & 0x00000001) && */ + crand cr0_2,cr7_2,cr0_2 /* (round == 0x80000000))) */ + beq noshift /* { */ + addi r3,r3,1 /* fpa.lo++; */ + lis r4,0x0100 /* r6 = 0x01000000; */ + cmpw cr0,r3,r4 /* if (fpa.hi == 0x01000000), overflow */ + bne noshift /* { */ + addi r10,r10,1 /* fpa.exp++; */ + /* } */ + /* } */ + +noshift: /* now put result together */ + subf r7, r7, r10 /* adjust biased exponent by shift count */ + rlwimi r3, r7, 23, 0x7f800000 /* put exponent and fraction together */ + or r3, r3, r5 /* add in sign */ + mtcr r6 + blr + + +function_epilog(__floatsisf) + +#endif diff -urN gcc-3.3.2/gcc/config/rs6000/4xx/ppc4xx.inc gcc-3.3.2tempxav/gcc/config/rs6000/4xx/ppc4xx.inc --- gcc-3.3.2/gcc/config/rs6000/4xx/ppc4xx.inc 1969-12-31 19:00:00.000000000 -0500 +++ gcc-3.3.2tempxav/gcc/config/rs6000/4xx/ppc4xx.inc 2004-01-23 14:09:42.000000000 -0500 @@ -0,0 +1,139 @@ +/* include/ppc4xx.inc, pl_common, pl_linux 12/11/03 13:43:26 + *----------------------------------------------------------------------------- + * + * Copyright (C) 2003 IBM Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE + * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ppc4xx_inc +#define ppc4xx_inc + + +/*----------------------------------------------------------------------------+ +| Function prolog definition and other GCC/EABI defines. ++----------------------------------------------------------------------------*/ + +#define r0 %r0 +#define r1 %r1 +#define r2 %r2 +#define r3 %r3 +#define r4 %r4 +#define r5 %r5 +#define r6 %r6 +#define r7 %r7 +#define r8 %r8 +#define r9 %r9 +#define r10 %r10 +#define r11 %r11 +#define r12 %r12 +#define r13 %r13 +#define r14 %r14 +#define r15 %r15 +#define r16 %r16 +#define r17 %r17 +#define r18 %r18 +#define r19 %r19 +#define r20 %r20 +#define r21 %r21 +#define r22 %r22 +#define r23 %r23 +#define r24 %r24 +#define r25 %r25 +#define r26 %r26 +#define r27 %r27 +#define r28 %r28 +#define r29 %r29 +#define r30 %r30 +#define r31 %r31 + +#define cr0 %cr0 +#define cr1 %cr1 +#define cr2 %cr2 +#define cr3 %cr3 +#define cr4 %cr4 +#define cr5 %cr5 +#define cr6 %cr6 +#define cr7 %cr7 + +/* helpful cr bit definitions */ +#define cr0_0 0 +#define cr0_1 1 +#define cr0_2 2 +#define cr0_3 3 + +#define cr1_0 4 +#define cr1_1 5 +#define cr1_2 6 +#define cr1_3 7 + +#define cr2_0 8 +#define cr2_1 9 +#define cr2_2 10 +#define cr2_3 11 + +#define cr3_0 12 +#define cr3_1 13 +#define cr3_2 14 +#define cr3_3 15 + +#define cr4_0 16 +#define cr4_1 17 +#define cr4_2 18 +#define cr4_3 19 + +#define cr5_0 20 +#define cr5_1 21 +#define cr5_2 22 +#define cr5_3 23 + +#define cr6_0 24 +#define cr6_1 25 +#define cr6_2 26 +#define cr6_3 27 + +#define cr7_0 28 +#define cr7_1 29 +#define cr7_2 30 +#define cr7_3 31 + + +#define function_prolog(func_name) .text; \ + .align 2; \ + .global func_name; \ + func_name: +#define function_epilog(func_name) .type func_name,@function; \ + .size func_name,.-func_name + +#define function_call(func_name) bl func_name + +#endif + + diff -urN gcc-3.3.2/gcc/config/rs6000/t-ppc4xx gcc-3.3.2tempxav/gcc/config/rs6000/t-ppc4xx --- gcc-3.3.2/gcc/config/rs6000/t-ppc4xx 1969-12-31 19:00:00.000000000 -0500 +++ gcc-3.3.2tempxav/gcc/config/rs6000/t-ppc4xx 2004-02-02 15:51:50.000000000 -0500 @@ -0,0 +1,4 @@ +LIB1ASMSRC= rs6000/4xx/lib1funcs_4xx.S +LIB1ASMFUNCS= __truncdfsf2 __fixdfsi __adddf3 __addsf3 __eqdf2 __nedf2 __ledf2 __ltdf2 __gedf2 __gtdf2 __cmpdf2 __eqsf2 __nesf2 __lesf2 __ltsf2 __gesf2 __gtsf2 __cmpsf2 __divdf3 __divsf3 __muldf3 __mulsf3 __negdf2 __negsf2 __subdf3 __subsf3 __extendsfdf2 __fixsfsi __floatsidf __floatsisf +FPBIT = +DPBIT = diff -urN gcc-3.3.2/gcc/config.gcc gcc-3.3.2tempxav/gcc/config.gcc --- gcc-3.3.2/gcc/config.gcc 2003-10-01 15:07:01.000000000 -0400 +++ gcc-3.3.2tempxav/gcc/config.gcc 2004-01-23 14:09:42.000000000 -0500 @@ -2956,10 +2956,15 @@ target_cpu_default2="\\\"$with_cpu\\\"" ;; - x401 | x403 | x405 | xec603e | x801 | x821 | x823 | x860) + x401 | x403 | xec603e | x801 | x821 | x823 | x860) target_cpu_default2="\\\"$with_cpu\\\"" ;; + x405 | x440) + target_cpu_default2="\\\"$with_cpu\\\"" + tmake_file="${tmake_file} rs6000/t-ppc4xx" + ;; + xyes | xno) echo "--with-cpu must be passed a value" 1>&2 exit 1
------ Want more information? See the CrossGCC FAQ, http://www.objsw.com/CrossGCC/ Want to unsubscribe? Send a note to crossgcc-unsubscribe@sources.redhat.com
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |