From 3352b62bebfd1e5c2e9961f481df968ab317d78d Mon Sep 17 00:00:00 2001 From: NetBSD project Date: Thu, 30 Jul 2015 09:50:51 +0000 Subject: [PATCH] StdLib/LibC: Add software floating point library from NetBSD Floating point processing is not supported on ARM for UEFI. In order to support UEFI applications in AppPkg we use this library to provide the required functionality. Changes as compared to the NetBSD version: - Formatting changes (tabs to spaces, DOS line endings etc). - Disable exceptions as described in the float_raise() function. - Disable definition of 'Symbolic Boolean literals' in milieu.h. Source originally from: NetBSD project - Source: http://cvsweb.netbsd.org/bsdweb.cgi/?only_with_tag=MAIN - Licensing and Copyright: http://www.netbsd.org/about/redistribution.html Contributed-under: TianoCore Contribution Agreement 1.0 Signed-off-by: Harry Liebel Reviewed-by: Olivier Martin Reviewed-by: Daryl McDaniel git-svn-id: https://svn.code.sf.net/p/edk2/code/trunk/edk2@18116 6f19259b-4bc3-4df7-8a09-765794883524 --- StdLib/Include/Arm/arm-gcc.h | 114 + StdLib/Include/Arm/machine/fenv.h | 55 + StdLib/Include/Arm/machine/ieeefp.h | 58 + StdLib/Include/Arm/milieu.h | 38 + StdLib/Include/Arm/softfloat.h | 316 + StdLib/Include/ieeefp.h | 46 + StdLib/LibC/Softfloat/Arm/__aeabi_dcmpeq.c | 37 + StdLib/LibC/Softfloat/Arm/__aeabi_dcmpge.c | 35 + StdLib/LibC/Softfloat/Arm/__aeabi_dcmpgt.c | 37 + StdLib/LibC/Softfloat/Arm/__aeabi_dcmple.c | 37 + StdLib/LibC/Softfloat/Arm/__aeabi_dcmplt.c | 37 + StdLib/LibC/Softfloat/Arm/__aeabi_dcmpun.c | 42 + StdLib/LibC/Softfloat/Arm/__aeabi_fcmpeq.c | 37 + StdLib/LibC/Softfloat/Arm/__aeabi_fcmpge.c | 37 + StdLib/LibC/Softfloat/Arm/__aeabi_fcmpgt.c | 37 + StdLib/LibC/Softfloat/Arm/__aeabi_fcmple.c | 37 + StdLib/LibC/Softfloat/Arm/__aeabi_fcmplt.c | 37 + StdLib/LibC/Softfloat/Arm/__aeabi_fcmpun.c | 42 + StdLib/LibC/Softfloat/Makefile.inc | 42 + StdLib/LibC/Softfloat/README.NetBSD | 8 + StdLib/LibC/Softfloat/README.txt | 39 + StdLib/LibC/Softfloat/Softfloat.inf | 65 + StdLib/LibC/Softfloat/bits32/softfloat-macros | 648 ++ StdLib/LibC/Softfloat/bits32/softfloat.c | 2355 +++++++ StdLib/LibC/Softfloat/bits64/softfloat-macros | 745 +++ StdLib/LibC/Softfloat/bits64/softfloat.c | 5602 +++++++++++++++++ StdLib/LibC/Softfloat/eqdf2.c | 38 + StdLib/LibC/Softfloat/eqsf2.c | 38 + StdLib/LibC/Softfloat/eqtf2.c | 40 + StdLib/LibC/Softfloat/fpgetmask.c | 55 + StdLib/LibC/Softfloat/fpgetround.c | 55 + StdLib/LibC/Softfloat/fpgetsticky.c | 55 + StdLib/LibC/Softfloat/fpsetmask.c | 60 + StdLib/LibC/Softfloat/fpsetround.c | 60 + StdLib/LibC/Softfloat/fpsetsticky.c | 60 + StdLib/LibC/Softfloat/gedf2.c | 38 + StdLib/LibC/Softfloat/gesf2.c | 38 + StdLib/LibC/Softfloat/getf2.c | 40 + StdLib/LibC/Softfloat/gexf2.c | 39 + StdLib/LibC/Softfloat/gtdf2.c | 36 + StdLib/LibC/Softfloat/gtsf2.c | 36 + StdLib/LibC/Softfloat/gttf2.c | 40 + StdLib/LibC/Softfloat/gtxf2.c | 39 + StdLib/LibC/Softfloat/ledf2.c | 36 + StdLib/LibC/Softfloat/lesf2.c | 36 + StdLib/LibC/Softfloat/letf2.c | 40 + StdLib/LibC/Softfloat/ltdf2.c | 36 + StdLib/LibC/Softfloat/ltsf2.c | 36 + StdLib/LibC/Softfloat/lttf2.c | 40 + StdLib/LibC/Softfloat/nedf2.c | 36 + StdLib/LibC/Softfloat/negdf2.c | 36 + StdLib/LibC/Softfloat/negsf2.c | 36 + StdLib/LibC/Softfloat/negtf2.c | 41 + StdLib/LibC/Softfloat/negxf2.c | 39 + StdLib/LibC/Softfloat/nesf2.c | 36 + StdLib/LibC/Softfloat/netf2.c | 40 + StdLib/LibC/Softfloat/nexf2.c | 39 + StdLib/LibC/Softfloat/softfloat-for-gcc.h | 242 + StdLib/LibC/Softfloat/softfloat-history.txt | 52 + StdLib/LibC/Softfloat/softfloat-source.txt | 383 ++ StdLib/LibC/Softfloat/softfloat-specialize | 529 ++ StdLib/LibC/Softfloat/softfloat.txt | 372 ++ StdLib/LibC/Softfloat/templates/milieu.h | 48 + .../Softfloat/templates/softfloat-specialize | 464 ++ StdLib/LibC/Softfloat/templates/softfloat.h | 290 + StdLib/LibC/Softfloat/timesoftfloat.c | 2641 ++++++++ StdLib/LibC/Softfloat/timesoftfloat.txt | 149 + StdLib/LibC/Softfloat/unorddf2.c | 40 + StdLib/LibC/Softfloat/unordsf2.c | 40 + StdLib/LibC/Softfloat/unordtf2.c | 44 + StdLib/StdLib.inc | 1 + 71 files changed, 17172 insertions(+) create mode 100644 StdLib/Include/Arm/arm-gcc.h create mode 100644 StdLib/Include/Arm/machine/fenv.h create mode 100644 StdLib/Include/Arm/machine/ieeefp.h create mode 100644 StdLib/Include/Arm/milieu.h create mode 100644 StdLib/Include/Arm/softfloat.h create mode 100644 StdLib/Include/ieeefp.h create mode 100644 StdLib/LibC/Softfloat/Arm/__aeabi_dcmpeq.c create mode 100644 StdLib/LibC/Softfloat/Arm/__aeabi_dcmpge.c create mode 100644 StdLib/LibC/Softfloat/Arm/__aeabi_dcmpgt.c create mode 100644 StdLib/LibC/Softfloat/Arm/__aeabi_dcmple.c create mode 100644 StdLib/LibC/Softfloat/Arm/__aeabi_dcmplt.c create mode 100644 StdLib/LibC/Softfloat/Arm/__aeabi_dcmpun.c create mode 100644 StdLib/LibC/Softfloat/Arm/__aeabi_fcmpeq.c create mode 100644 StdLib/LibC/Softfloat/Arm/__aeabi_fcmpge.c create mode 100644 StdLib/LibC/Softfloat/Arm/__aeabi_fcmpgt.c create mode 100644 StdLib/LibC/Softfloat/Arm/__aeabi_fcmple.c create mode 100644 StdLib/LibC/Softfloat/Arm/__aeabi_fcmplt.c create mode 100644 StdLib/LibC/Softfloat/Arm/__aeabi_fcmpun.c create mode 100644 StdLib/LibC/Softfloat/Makefile.inc create mode 100644 StdLib/LibC/Softfloat/README.NetBSD create mode 100644 StdLib/LibC/Softfloat/README.txt create mode 100644 StdLib/LibC/Softfloat/Softfloat.inf create mode 100644 StdLib/LibC/Softfloat/bits32/softfloat-macros create mode 100644 StdLib/LibC/Softfloat/bits32/softfloat.c create mode 100644 StdLib/LibC/Softfloat/bits64/softfloat-macros create mode 100644 StdLib/LibC/Softfloat/bits64/softfloat.c create mode 100644 StdLib/LibC/Softfloat/eqdf2.c create mode 100644 StdLib/LibC/Softfloat/eqsf2.c create mode 100644 StdLib/LibC/Softfloat/eqtf2.c create mode 100644 StdLib/LibC/Softfloat/fpgetmask.c create mode 100644 StdLib/LibC/Softfloat/fpgetround.c create mode 100644 StdLib/LibC/Softfloat/fpgetsticky.c create mode 100644 StdLib/LibC/Softfloat/fpsetmask.c create mode 100644 StdLib/LibC/Softfloat/fpsetround.c create mode 100644 StdLib/LibC/Softfloat/fpsetsticky.c create mode 100644 StdLib/LibC/Softfloat/gedf2.c create mode 100644 StdLib/LibC/Softfloat/gesf2.c create mode 100644 StdLib/LibC/Softfloat/getf2.c create mode 100644 StdLib/LibC/Softfloat/gexf2.c create mode 100644 StdLib/LibC/Softfloat/gtdf2.c create mode 100644 StdLib/LibC/Softfloat/gtsf2.c create mode 100644 StdLib/LibC/Softfloat/gttf2.c create mode 100644 StdLib/LibC/Softfloat/gtxf2.c create mode 100644 StdLib/LibC/Softfloat/ledf2.c create mode 100644 StdLib/LibC/Softfloat/lesf2.c create mode 100644 StdLib/LibC/Softfloat/letf2.c create mode 100644 StdLib/LibC/Softfloat/ltdf2.c create mode 100644 StdLib/LibC/Softfloat/ltsf2.c create mode 100644 StdLib/LibC/Softfloat/lttf2.c create mode 100644 StdLib/LibC/Softfloat/nedf2.c create mode 100644 StdLib/LibC/Softfloat/negdf2.c create mode 100644 StdLib/LibC/Softfloat/negsf2.c create mode 100644 StdLib/LibC/Softfloat/negtf2.c create mode 100644 StdLib/LibC/Softfloat/negxf2.c create mode 100644 StdLib/LibC/Softfloat/nesf2.c create mode 100644 StdLib/LibC/Softfloat/netf2.c create mode 100644 StdLib/LibC/Softfloat/nexf2.c create mode 100644 StdLib/LibC/Softfloat/softfloat-for-gcc.h create mode 100644 StdLib/LibC/Softfloat/softfloat-history.txt create mode 100644 StdLib/LibC/Softfloat/softfloat-source.txt create mode 100644 StdLib/LibC/Softfloat/softfloat-specialize create mode 100644 StdLib/LibC/Softfloat/softfloat.txt create mode 100644 StdLib/LibC/Softfloat/templates/milieu.h create mode 100644 StdLib/LibC/Softfloat/templates/softfloat-specialize create mode 100644 StdLib/LibC/Softfloat/templates/softfloat.h create mode 100644 StdLib/LibC/Softfloat/timesoftfloat.c create mode 100644 StdLib/LibC/Softfloat/timesoftfloat.txt create mode 100644 StdLib/LibC/Softfloat/unorddf2.c create mode 100644 StdLib/LibC/Softfloat/unordsf2.c create mode 100644 StdLib/LibC/Softfloat/unordtf2.c diff --git a/StdLib/Include/Arm/arm-gcc.h b/StdLib/Include/Arm/arm-gcc.h new file mode 100644 index 0000000000..8cd4989998 --- /dev/null +++ b/StdLib/Include/Arm/arm-gcc.h @@ -0,0 +1,114 @@ +/** @file + + Copyright (c) 2014, ARM Limited. All rights reserved. + + This program and the accompanying materials + are licensed and made available under the terms and conditions of the BSD License + which accompanies this distribution. The full text of the license may be found at + http://opensource.org/licenses/bsd-license.php + + THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, + WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. + +**/ + +/* $NetBSD: arm-gcc.h,v 1.4 2013/01/26 07:08:14 matt Exp $ */ + +/* +------------------------------------------------------------------------------- +One of the macros `BIGENDIAN' or `LITTLEENDIAN' must be defined. +------------------------------------------------------------------------------- +*/ +#ifdef __ARMEB__ +#define BIGENDIAN +#else +#define LITTLEENDIAN +#endif + +/* +------------------------------------------------------------------------------- +The macro `BITS64' can be defined to indicate that 64-bit integer types are +supported by the compiler. +------------------------------------------------------------------------------- +*/ +#define BITS64 + +/* +------------------------------------------------------------------------------- +Each of the following `typedef's defines the most convenient type that holds +integers of at least as many bits as specified. For example, `uint8' should +be the most convenient type that can hold unsigned integers of as many as +8 bits. The `flag' type must be able to hold either a 0 or 1. For most +implementations of C, `flag', `uint8', and `int8' should all be `typedef'ed +to the same as `int'. +------------------------------------------------------------------------------- +*/ +typedef int flag; +typedef int uint8; +typedef int int8; +typedef int uint16; +typedef int int16; +typedef unsigned int uint32; +typedef signed int int32; +#ifdef BITS64 +typedef unsigned long long int uint64; +typedef signed long long int int64; +#endif + +/* +------------------------------------------------------------------------------- +Each of the following `typedef's defines a type that holds integers +of _exactly_ the number of bits specified. For instance, for most +implementation of C, `bits16' and `sbits16' should be `typedef'ed to +`unsigned short int' and `signed short int' (or `short int'), respectively. +------------------------------------------------------------------------------- +*/ +typedef unsigned char bits8; +typedef signed char sbits8; +typedef unsigned short int bits16; +typedef signed short int sbits16; +typedef unsigned int bits32; +typedef signed int sbits32; +#ifdef BITS64 +typedef unsigned long long int bits64; +typedef signed long long int sbits64; +#endif + +#ifdef BITS64 +/* +------------------------------------------------------------------------------- +The `LIT64' macro takes as its argument a textual integer literal and +if necessary ``marks'' the literal as having a 64-bit integer type. +For example, the GNU C Compiler (`gcc') requires that 64-bit literals be +appended with the letters `LL' standing for `long long', which is `gcc's +name for the 64-bit integer type. Some compilers may allow `LIT64' to be +defined as the identity macro: `#define LIT64( a ) a'. +------------------------------------------------------------------------------- +*/ +#define LIT64( a ) a##ULL +#endif + +/* +------------------------------------------------------------------------------- +The macro `INLINE' can be used before functions that should be inlined. If +a compiler does not support explicit inlining, this macro should be defined +to be `static'. +------------------------------------------------------------------------------- +*/ +#define INLINE static inline + +/* +------------------------------------------------------------------------------- +The ARM FPA is odd in that it stores doubles high-order word first, no matter +what the endianness of the CPU. VFP is sane. +------------------------------------------------------------------------------- +*/ +#if defined(SOFTFLOAT_FOR_GCC) +#if defined(__VFP_FP__) || defined(__ARMEB__) +#define FLOAT64_DEMANGLE(a) (a) +#define FLOAT64_MANGLE(a) (a) +#else +#define FLOAT64_DEMANGLE(a) (((a) << 32) | ((a) >> 32)) +#define FLOAT64_MANGLE(a) FLOAT64_DEMANGLE(a) +#endif +#endif diff --git a/StdLib/Include/Arm/machine/fenv.h b/StdLib/Include/Arm/machine/fenv.h new file mode 100644 index 0000000000..cdca48c094 --- /dev/null +++ b/StdLib/Include/Arm/machine/fenv.h @@ -0,0 +1,55 @@ +/* $NetBSD: fenv.h,v 1.2 2014/01/29 00:22:09 matt Exp $ */ +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ + /* + * Based on ieeefp.h written by J.T. Conklin, Apr 28, 1995 + * Public domain. + */ + +#ifndef _ARM_FENV_H_ +#define _ARM_FENV_H_ + +#ifdef __ARM_PCS_AAPCS64 +/* AArch64 split FPSCR into two registers FPCR and FPSR */ +typedef struct { + unsigned int __fpcr; + unsigned int __fpsr; +} fenv_t; +#else +typedef int fenv_t; /* FPSCR */ +#endif +typedef int fexcept_t; + +#define FE_INVALID 0x01 /* invalid operation exception */ +#define FE_DIVBYZERO 0x02 /* divide-by-zero exception */ +#define FE_OVERFLOW 0x04 /* overflow exception */ +#define FE_UNDERFLOW 0x08 /* underflow exception */ +#define FE_INEXACT 0x10 /* imprecise (loss of precision; "inexact") */ + +#define FE_ALL_EXCEPT 0x1f + +#define FE_TONEAREST 0 /* round to nearest representable number */ +#define FE_UPWARD 1 /* round toward positive infinity */ +#define FE_DOWNWARD 2 /* round toward negative infinity */ +#define FE_TOWARDZERO 3 /* round to zero (truncate) */ + +__BEGIN_DECLS + +/* Default floating-point environment */ +extern const fenv_t __fe_dfl_env; +#define FE_DFL_ENV (&__fe_dfl_env) + +__END_DECLS + +#endif /* _ARM_FENV_H_ */ diff --git a/StdLib/Include/Arm/machine/ieeefp.h b/StdLib/Include/Arm/machine/ieeefp.h new file mode 100644 index 0000000000..c3de5ef726 --- /dev/null +++ b/StdLib/Include/Arm/machine/ieeefp.h @@ -0,0 +1,58 @@ +/* $NetBSD: ieeefp.h,v 1.3 2013/04/23 05:42:23 matt Exp $ */ +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ +/* + * Based on ieeefp.h written by J.T. Conklin, Apr 28, 1995 + * Public domain. + */ + +#ifndef _ARM_IEEEFP_H_ +#define _ARM_IEEEFP_H_ + +#include +#include + +#if defined(_NETBSD_SOURCE) || defined(_ISOC99_SOURCE) + +//#include +#include + +#if !defined(_ISOC99_SOURCE) + +/* Exception type (used by fpsetmask() et al.) */ + +typedef int fp_except; + +/* Bit defines for fp_except */ + +#define FP_X_INV FE_INVALID /* invalid operation exception */ +#define FP_X_DZ FE_DIVBYZERO /* divide-by-zero exception */ +#define FP_X_OFL FE_OVERFLOW /* overflow exception */ +#define FP_X_UFL FE_UNDERFLOW /* underflow exception */ +#define FP_X_IMP FE_INEXACT /* imprecise (prec. loss; "inexact") */ + +/* Rounding modes */ + +typedef enum { + FP_RN=FE_TONEAREST, /* round to nearest representable number */ + FP_RP=FE_UPWARD, /* round toward positive infinity */ + FP_RM=FE_DOWNWARD, /* round toward negative infinity */ + FP_RZ=FE_TOWARDZERO /* round to zero (truncate) */ +} fp_rnd; + +#endif /* !_ISOC99_SOURCE */ + +#endif /* _NETBSD_SOURCE || _ISOC99_SOURCE */ + +#endif /* _ARM_IEEEFP_H_ */ diff --git a/StdLib/Include/Arm/milieu.h b/StdLib/Include/Arm/milieu.h new file mode 100644 index 0000000000..8f4ac00076 --- /dev/null +++ b/StdLib/Include/Arm/milieu.h @@ -0,0 +1,38 @@ +/* $NetBSD: milieu.h,v 1.1 2000/12/29 20:13:54 bjh21 Exp $ */ + +/* +=============================================================================== + +This C header file is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2a. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these four paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +/* +------------------------------------------------------------------------------- +Include common integer types and flags. +------------------------------------------------------------------------------- +*/ +#include "arm-gcc.h" diff --git a/StdLib/Include/Arm/softfloat.h b/StdLib/Include/Arm/softfloat.h new file mode 100644 index 0000000000..d52c5a9f8d --- /dev/null +++ b/StdLib/Include/Arm/softfloat.h @@ -0,0 +1,316 @@ +/* $NetBSD: softfloat.h,v 1.10 2013/04/24 18:04:46 matt Exp $ */ + +/* This is a derivative work. */ + +/* +=============================================================================== + +This C header file is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2a. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these four paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +/* +------------------------------------------------------------------------------- +The macro `FLOATX80' must be defined to enable the extended double-precision +floating-point format `floatx80'. If this macro is not defined, the +`floatx80' type will not be defined, and none of the functions that either +input or output the `floatx80' type will be defined. The same applies to +the `FLOAT128' macro and the quadruple-precision format `float128'. +------------------------------------------------------------------------------- +*/ +/* #define FLOATX80 */ +/* #define FLOAT128 */ + +#include +#include + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE floating-point types. +------------------------------------------------------------------------------- +*/ +typedef unsigned int float32; +typedef unsigned long long float64; +#ifdef FLOATX80 +typedef struct { + unsigned short high; + unsigned long long low; +} floatx80; +#endif +#ifdef FLOAT128 +typedef struct { + unsigned long long high, low; +} float128; +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE floating-point underflow tininess-detection mode. +------------------------------------------------------------------------------- +*/ +#ifndef SOFTFLOAT_FOR_GCC +extern int float_detect_tininess; +#endif +enum { + float_tininess_after_rounding = 0, + float_tininess_before_rounding = 1 +}; + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE floating-point rounding mode. +------------------------------------------------------------------------------- +*/ +extern fp_rnd float_rounding_mode; +#define float_round_nearest_even FP_RN +#define float_round_to_zero FP_RZ +#define float_round_down FP_RM +#define float_round_up FP_RP + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE floating-point exception flags. +------------------------------------------------------------------------------- +*/ +extern fp_except float_exception_flags; +extern fp_except float_exception_mask; +enum { + float_flag_inexact = FP_X_IMP, + float_flag_underflow = FP_X_UFL, + float_flag_overflow = FP_X_OFL, + float_flag_divbyzero = FP_X_DZ, + float_flag_invalid = FP_X_INV +}; + +/* +------------------------------------------------------------------------------- +Routine to raise any or all of the software IEC/IEEE floating-point +exception flags. +------------------------------------------------------------------------------- +*/ +void float_raise( fp_except ); + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE integer-to-floating-point conversion routines. +------------------------------------------------------------------------------- +*/ +float32 int32_to_float32( int32 ); +float32 uint32_to_float32( uint32 ); +float64 int32_to_float64( int32 ); +float64 uint32_to_float64( uint32 ); +#ifdef FLOATX80 +floatx80 int32_to_floatx80( int32 ); +floatx80 uint32_to_floatx80( uint32 ); +#endif +#ifdef FLOAT128 +float128 int32_to_float128( int32 ); +float128 uint32_to_float128( uint32 ); +#endif +#ifndef SOFTFLOAT_FOR_GCC /* __floatdi?f is in libgcc2.c */ +float32 int64_to_float32( long long ); +float64 int64_to_float64( long long ); +#ifdef FLOATX80 +floatx80 int64_to_floatx80( long long ); +#endif +#ifdef FLOAT128 +float128 int64_to_float128( long long ); +#endif +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE single-precision conversion routines. +------------------------------------------------------------------------------- +*/ +int float32_to_int32( float32 ); +int float32_to_int32_round_to_zero( float32 ); +#if defined(SOFTFLOAT_FOR_GCC) && defined(SOFTFLOAT_NEED_FIXUNS) +unsigned int float32_to_uint32_round_to_zero( float32 ); +#endif +#ifndef SOFTFLOAT_FOR_GCC /* __fix?fdi provided by libgcc2.c */ +long long float32_to_int64( float32 ); +long long float32_to_int64_round_to_zero( float32 ); +#endif +float64 float32_to_float64( float32 ); +#ifdef FLOATX80 +floatx80 float32_to_floatx80( float32 ); +#endif +#ifdef FLOAT128 +float128 float32_to_float128( float32 ); +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE single-precision operations. +------------------------------------------------------------------------------- +*/ +float32 float32_round_to_int( float32 ); +float32 float32_add( float32, float32 ); +float32 float32_sub( float32, float32 ); +float32 float32_mul( float32, float32 ); +float32 float32_div( float32, float32 ); +float32 float32_rem( float32, float32 ); +float32 float32_sqrt( float32 ); +int float32_eq( float32, float32 ); +int float32_le( float32, float32 ); +int float32_lt( float32, float32 ); +int float32_eq_signaling( float32, float32 ); +int float32_le_quiet( float32, float32 ); +int float32_lt_quiet( float32, float32 ); +#ifndef SOFTFLOAT_FOR_GCC +int float32_is_signaling_nan( float32 ); +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE double-precision conversion routines. +------------------------------------------------------------------------------- +*/ +int float64_to_int32( float64 ); +int float64_to_int32_round_to_zero( float64 ); +#if defined(SOFTFLOAT_FOR_GCC) && defined(SOFTFLOAT_NEED_FIXUNS) +unsigned int float64_to_uint32_round_to_zero( float64 ); +#endif +#ifndef SOFTFLOAT_FOR_GCC /* __fix?fdi provided by libgcc2.c */ +long long float64_to_int64( float64 ); +long long float64_to_int64_round_to_zero( float64 ); +#endif +float32 float64_to_float32( float64 ); +#ifdef FLOATX80 +floatx80 float64_to_floatx80( float64 ); +#endif +#ifdef FLOAT128 +float128 float64_to_float128( float64 ); +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE double-precision operations. +------------------------------------------------------------------------------- +*/ +float64 float64_round_to_int( float64 ); +float64 float64_add( float64, float64 ); +float64 float64_sub( float64, float64 ); +float64 float64_mul( float64, float64 ); +float64 float64_div( float64, float64 ); +float64 float64_rem( float64, float64 ); +float64 float64_sqrt( float64 ); +int float64_eq( float64, float64 ); +int float64_le( float64, float64 ); +int float64_lt( float64, float64 ); +int float64_eq_signaling( float64, float64 ); +int float64_le_quiet( float64, float64 ); +int float64_lt_quiet( float64, float64 ); +#ifndef SOFTFLOAT_FOR_GCC +int float64_is_signaling_nan( float64 ); +#endif + +#ifdef FLOATX80 + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE extended double-precision conversion routines. +------------------------------------------------------------------------------- +*/ +int floatx80_to_int32( floatx80 ); +int floatx80_to_int32_round_to_zero( floatx80 ); +long long floatx80_to_int64( floatx80 ); +long long floatx80_to_int64_round_to_zero( floatx80 ); +float32 floatx80_to_float32( floatx80 ); +float64 floatx80_to_float64( floatx80 ); +#ifdef FLOAT128 +float128 floatx80_to_float128( floatx80 ); +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE extended double-precision rounding precision. Valid +values are 32, 64, and 80. +------------------------------------------------------------------------------- +*/ +extern int floatx80_rounding_precision; + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE extended double-precision operations. +------------------------------------------------------------------------------- +*/ +floatx80 floatx80_round_to_int( floatx80 ); +floatx80 floatx80_add( floatx80, floatx80 ); +floatx80 floatx80_sub( floatx80, floatx80 ); +floatx80 floatx80_mul( floatx80, floatx80 ); +floatx80 floatx80_div( floatx80, floatx80 ); +floatx80 floatx80_rem( floatx80, floatx80 ); +floatx80 floatx80_sqrt( floatx80 ); +int floatx80_eq( floatx80, floatx80 ); +int floatx80_le( floatx80, floatx80 ); +int floatx80_lt( floatx80, floatx80 ); +int floatx80_eq_signaling( floatx80, floatx80 ); +int floatx80_le_quiet( floatx80, floatx80 ); +int floatx80_lt_quiet( floatx80, floatx80 ); +int floatx80_is_signaling_nan( floatx80 ); + +#endif + +#ifdef FLOAT128 + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE quadruple-precision conversion routines. +------------------------------------------------------------------------------- +*/ +int float128_to_int32( float128 ); +int float128_to_int32_round_to_zero( float128 ); +long long float128_to_int64( float128 ); +long long float128_to_int64_round_to_zero( float128 ); +float32 float128_to_float32( float128 ); +float64 float128_to_float64( float128 ); +#ifdef FLOATX80 +floatx80 float128_to_floatx80( float128 ); +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE quadruple-precision operations. +------------------------------------------------------------------------------- +*/ +float128 float128_round_to_int( float128 ); +float128 float128_add( float128, float128 ); +float128 float128_sub( float128, float128 ); +float128 float128_mul( float128, float128 ); +float128 float128_div( float128, float128 ); +float128 float128_rem( float128, float128 ); +float128 float128_sqrt( float128 ); +int float128_eq( float128, float128 ); +int float128_le( float128, float128 ); +int float128_lt( float128, float128 ); +int float128_eq_signaling( float128, float128 ); +int float128_le_quiet( float128, float128 ); +int float128_lt_quiet( float128, float128 ); +int float128_is_signaling_nan( float128 ); + +#endif diff --git a/StdLib/Include/ieeefp.h b/StdLib/Include/ieeefp.h new file mode 100644 index 0000000000..ee3edf5566 --- /dev/null +++ b/StdLib/Include/ieeefp.h @@ -0,0 +1,46 @@ +/* $NetBSD: ieeefp.h,v 1.9 2011/03/27 05:13:15 mrg Exp $ */ +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ +/* + * Written by J.T. Conklin, Apr 6, 1995 + * Public domain. + */ + +#ifndef _IEEEFP_H_ +#define _IEEEFP_H_ + +#include +#include + +__BEGIN_DECLS +typedef fp_rnd fp_rnd_t; +#ifdef _X86_IEEEFP_H_ /* XXX */ +typedef fp_prec fp_prec_t; +#endif +typedef fp_except fp_except_t; + +fp_rnd_t fpgetround(void); +fp_rnd_t fpsetround(fp_rnd_t); +#ifdef _X86_IEEEFP_H_ /* XXX */ +fp_prec_t fpgetprec(void); +fp_prec_t fpsetprec(fp_prec_t); +#endif +fp_except_t fpgetmask(void); +fp_except_t fpsetmask(fp_except_t); +fp_except_t fpgetsticky(void); +fp_except_t fpsetsticky(fp_except_t); +fp_except_t fpresetsticky(fp_except_t); +__END_DECLS + +#endif /* _IEEEFP_H_ */ diff --git a/StdLib/LibC/Softfloat/Arm/__aeabi_dcmpeq.c b/StdLib/LibC/Softfloat/Arm/__aeabi_dcmpeq.c new file mode 100644 index 0000000000..8bde7a5489 --- /dev/null +++ b/StdLib/LibC/Softfloat/Arm/__aeabi_dcmpeq.c @@ -0,0 +1,37 @@ +/* $NetBSD: __aeabi_dcmpeq.c,v 1.1 2013/04/16 10:37:39 matt Exp $ */ + +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ + +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: __aeabi_dcmpeq.c,v 1.1 2013/04/16 10:37:39 matt Exp $"); +#endif /* LIBC_SCCS and not lint */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +int __aeabi_dcmpeq(float64, float64); + +int +__aeabi_dcmpeq(float64 a, float64 b) +{ + + return float64_eq(a, b); +} diff --git a/StdLib/LibC/Softfloat/Arm/__aeabi_dcmpge.c b/StdLib/LibC/Softfloat/Arm/__aeabi_dcmpge.c new file mode 100644 index 0000000000..c153feb6a8 --- /dev/null +++ b/StdLib/LibC/Softfloat/Arm/__aeabi_dcmpge.c @@ -0,0 +1,35 @@ +/* $NetBSD: __aeabi_dcmpge.c,v 1.2 2013/04/16 13:38:34 matt Exp $ */ +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: __aeabi_dcmpge.c,v 1.2 2013/04/16 13:38:34 matt Exp $"); +#endif /* LIBC_SCCS and not lint */ + +int __aeabi_dcmpge(float64, float64); + +int +__aeabi_dcmpge(float64 a, float64 b) +{ + + return !float64_lt(a, b) && float64_eq(a, a) && float64_eq(b, b); +} diff --git a/StdLib/LibC/Softfloat/Arm/__aeabi_dcmpgt.c b/StdLib/LibC/Softfloat/Arm/__aeabi_dcmpgt.c new file mode 100644 index 0000000000..5fb1606697 --- /dev/null +++ b/StdLib/LibC/Softfloat/Arm/__aeabi_dcmpgt.c @@ -0,0 +1,37 @@ +/* $NetBSD: __aeabi_dcmpgt.c,v 1.2 2013/04/16 13:38:34 matt Exp $ */ + +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ + +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: __aeabi_dcmpgt.c,v 1.2 2013/04/16 13:38:34 matt Exp $"); +#endif /* LIBC_SCCS and not lint */ + +int __aeabi_dcmpgt(float64, float64); + +int +__aeabi_dcmpgt(float64 a, float64 b) +{ + + return !float64_le(a, b) && float64_eq(a, a) && float64_eq(b, b); +} diff --git a/StdLib/LibC/Softfloat/Arm/__aeabi_dcmple.c b/StdLib/LibC/Softfloat/Arm/__aeabi_dcmple.c new file mode 100644 index 0000000000..a8327c5e5b --- /dev/null +++ b/StdLib/LibC/Softfloat/Arm/__aeabi_dcmple.c @@ -0,0 +1,37 @@ +/* $NetBSD: __aeabi_dcmple.c,v 1.1 2013/04/16 10:37:39 matt Exp $ */ + +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ + +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: __aeabi_dcmple.c,v 1.1 2013/04/16 10:37:39 matt Exp $"); +#endif /* LIBC_SCCS and not lint */ + +int __aeabi_dcmple(float64, float64); + +int +__aeabi_dcmple(float64 a, float64 b) +{ + + return float64_le(a, b); +} diff --git a/StdLib/LibC/Softfloat/Arm/__aeabi_dcmplt.c b/StdLib/LibC/Softfloat/Arm/__aeabi_dcmplt.c new file mode 100644 index 0000000000..8d0e143cb4 --- /dev/null +++ b/StdLib/LibC/Softfloat/Arm/__aeabi_dcmplt.c @@ -0,0 +1,37 @@ +/* $NetBSD: __aeabi_dcmplt.c,v 1.1 2013/04/16 10:37:39 matt Exp $ */ + +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ + +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: __aeabi_dcmplt.c,v 1.1 2013/04/16 10:37:39 matt Exp $"); +#endif /* LIBC_SCCS and not lint */ + +int __aeabi_dcmplt(float64, float64); + +int +__aeabi_dcmplt(float64 a, float64 b) +{ + + return float64_lt(a, b); +} diff --git a/StdLib/LibC/Softfloat/Arm/__aeabi_dcmpun.c b/StdLib/LibC/Softfloat/Arm/__aeabi_dcmpun.c new file mode 100644 index 0000000000..fa91120a6c --- /dev/null +++ b/StdLib/LibC/Softfloat/Arm/__aeabi_dcmpun.c @@ -0,0 +1,42 @@ +/* $NetBSD: __aeabi_dcmpun.c,v 1.1 2013/04/16 10:37:39 matt Exp $ */ + +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ + +/* + * Written by Richard Earnshaw, 2003. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: __aeabi_dcmpun.c,v 1.1 2013/04/16 10:37:39 matt Exp $"); +#endif /* LIBC_SCCS and not lint */ + +int __aeabi_dcmpun(float64, float64); + +int +__aeabi_dcmpun(float64 a, float64 b) +{ + /* + * The comparison is unordered if either input is a NaN. + * Test for this by comparing each operand with itself. + * We must perform both comparisons to correctly check for + * signalling NaNs. + */ + return !float64_eq(a, a) || !float64_eq(b, b); +} diff --git a/StdLib/LibC/Softfloat/Arm/__aeabi_fcmpeq.c b/StdLib/LibC/Softfloat/Arm/__aeabi_fcmpeq.c new file mode 100644 index 0000000000..83db09e6a4 --- /dev/null +++ b/StdLib/LibC/Softfloat/Arm/__aeabi_fcmpeq.c @@ -0,0 +1,37 @@ +/* $NetBSD: __aeabi_fcmpeq.c,v 1.1 2013/04/16 10:37:39 matt Exp $ */ + +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ + +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: __aeabi_fcmpeq.c,v 1.1 2013/04/16 10:37:39 matt Exp $"); +#endif /* LIBC_SCCS and not lint */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +int __aeabi_fcmpeq(float32, float32); + +int +__aeabi_fcmpeq(float32 a, float32 b) +{ + + return float32_eq(a, b); +} diff --git a/StdLib/LibC/Softfloat/Arm/__aeabi_fcmpge.c b/StdLib/LibC/Softfloat/Arm/__aeabi_fcmpge.c new file mode 100644 index 0000000000..db59a98822 --- /dev/null +++ b/StdLib/LibC/Softfloat/Arm/__aeabi_fcmpge.c @@ -0,0 +1,37 @@ +/* $NetBSD: __aeabi_fcmpge.c,v 1.2 2013/04/16 13:38:34 matt Exp $ */ + +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ + +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: __aeabi_fcmpge.c,v 1.2 2013/04/16 13:38:34 matt Exp $"); +#endif /* LIBC_SCCS and not lint */ + +int __aeabi_fcmpge(float32, float32); + +int +__aeabi_fcmpge(float32 a, float32 b) +{ + + return !float32_lt(a, b) && float32_eq(a, a) && float32_eq(b, b); +} diff --git a/StdLib/LibC/Softfloat/Arm/__aeabi_fcmpgt.c b/StdLib/LibC/Softfloat/Arm/__aeabi_fcmpgt.c new file mode 100644 index 0000000000..6d6dea6088 --- /dev/null +++ b/StdLib/LibC/Softfloat/Arm/__aeabi_fcmpgt.c @@ -0,0 +1,37 @@ +/* $NetBSD: __aeabi_fcmpgt.c,v 1.2 2013/04/16 13:38:34 matt Exp $ */ + +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ + +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: __aeabi_fcmpgt.c,v 1.2 2013/04/16 13:38:34 matt Exp $"); +#endif /* LIBC_SCCS and not lint */ + +int __aeabi_fcmpgt(float32, float32); + +int +__aeabi_fcmpgt(float32 a, float32 b) +{ + + return !float32_le(a, b) && float32_eq(a, a) && float32_eq(b, b); +} diff --git a/StdLib/LibC/Softfloat/Arm/__aeabi_fcmple.c b/StdLib/LibC/Softfloat/Arm/__aeabi_fcmple.c new file mode 100644 index 0000000000..84c0355e2e --- /dev/null +++ b/StdLib/LibC/Softfloat/Arm/__aeabi_fcmple.c @@ -0,0 +1,37 @@ +/* $NetBSD: __aeabi_fcmple.c,v 1.1 2013/04/16 10:37:39 matt Exp $ */ + +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ + +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: __aeabi_fcmple.c,v 1.1 2013/04/16 10:37:39 matt Exp $"); +#endif /* LIBC_SCCS and not lint */ + +int __aeabi_fcmple(float32, float32); + +int +__aeabi_fcmple(float32 a, float32 b) +{ + + return float32_le(a, b); +} diff --git a/StdLib/LibC/Softfloat/Arm/__aeabi_fcmplt.c b/StdLib/LibC/Softfloat/Arm/__aeabi_fcmplt.c new file mode 100644 index 0000000000..a421e8ce21 --- /dev/null +++ b/StdLib/LibC/Softfloat/Arm/__aeabi_fcmplt.c @@ -0,0 +1,37 @@ +/* $NetBSD: __aeabi_fcmplt.c,v 1.1 2013/04/16 10:37:39 matt Exp $ */ + +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ + +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: __aeabi_fcmplt.c,v 1.1 2013/04/16 10:37:39 matt Exp $"); +#endif /* LIBC_SCCS and not lint */ + +int __aeabi_fcmplt(float32, float32); + +int +__aeabi_fcmplt(float32 a, float32 b) +{ + + return float32_lt(a, b); +} diff --git a/StdLib/LibC/Softfloat/Arm/__aeabi_fcmpun.c b/StdLib/LibC/Softfloat/Arm/__aeabi_fcmpun.c new file mode 100644 index 0000000000..403afba17e --- /dev/null +++ b/StdLib/LibC/Softfloat/Arm/__aeabi_fcmpun.c @@ -0,0 +1,42 @@ +/* $NetBSD: __aeabi_fcmpun.c,v 1.1 2013/04/16 10:37:39 matt Exp $ */ + +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ + +/* + * Written by Richard Earnshaw, 2003. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: __aeabi_fcmpun.c,v 1.1 2013/04/16 10:37:39 matt Exp $"); +#endif /* LIBC_SCCS and not lint */ + +int __aeabi_fcmpun(float32, float32); + +int +__aeabi_fcmpun(float32 a, float32 b) +{ + /* + * The comparison is unordered if either input is a NaN. + * Test for this by comparing each operand with itself. + * We must perform both comparisons to correctly check for + * signalling NaNs. + */ + return !float32_eq(a, a) || !float32_eq(b, b); +} diff --git a/StdLib/LibC/Softfloat/Makefile.inc b/StdLib/LibC/Softfloat/Makefile.inc new file mode 100644 index 0000000000..df7a95aaa9 --- /dev/null +++ b/StdLib/LibC/Softfloat/Makefile.inc @@ -0,0 +1,42 @@ +# $NetBSD: Makefile.inc,v 1.17 2014/01/30 19:11:54 matt Exp $ + +SOFTFLOAT_BITS?=64 +.PATH: ${ARCHDIR}/softfloat \ + ${.CURDIR}/softfloat/bits${SOFTFLOAT_BITS} ${.CURDIR}/softfloat + +CPPFLAGS+= -I${ARCHDIR}/softfloat -I${.CURDIR}/softfloat +CPPFLAGS+= -DSOFTFLOAT_FOR_GCC + +SRCS.softfloat= softfloat.c + +SRCS.softfloat+=fpgetround.c fpsetround.c fpgetmask.c fpsetmask.c \ + fpgetsticky.c fpsetsticky.c + +.if !empty(LIBC_MACHINE_ARCH:Mearm*) +SRCS.softfloat+=__aeabi_dcmpeq.c __aeabi_fcmpeq.c +SRCS.softfloat+=__aeabi_dcmpge.c __aeabi_fcmpge.c +SRCS.softfloat+=__aeabi_dcmpgt.c __aeabi_fcmpgt.c +SRCS.softfloat+=__aeabi_dcmple.c __aeabi_fcmple.c +SRCS.softfloat+=__aeabi_dcmplt.c __aeabi_fcmplt.c +SRCS.softfloat+=__aeabi_dcmpun.c __aeabi_fcmpun.c +.else +SRCS.softfloat+=eqsf2.c nesf2.c gtsf2.c gesf2.c ltsf2.c lesf2.c negsf2.c \ + eqdf2.c nedf2.c gtdf2.c gedf2.c ltdf2.c ledf2.c negdf2.c \ + eqtf2.c netf2.c gttf2.c getf2.c lttf2.c letf2.c negtf2.c \ + nexf2.c gtxf2.c gexf2.c negxf2.c \ + unordsf2.c unorddf2.c unordtf2.c +.endif + +SRCS+= ${SRCS.softfloat} + +# XXX +.if defined(HAVE_GCC) && ${HAVE_GCC} >= 45 +.if (${MACHINE_CPU} == "arm") +# See doc/HACKS for more information. +COPTS.softfloat.c+= -Wno-enum-compare +COPTS.softfloat.c+= ${${ACTIVE_CXX} == "gcc":? -fno-tree-vrp :} +.elif (${MACHINE_CPU} == "mips" || \ + ${MACHINE_CPU} == "sh3") +COPTS.softfloat.c+= -Wno-enum-compare +.endif +.endif diff --git a/StdLib/LibC/Softfloat/README.NetBSD b/StdLib/LibC/Softfloat/README.NetBSD new file mode 100644 index 0000000000..ec310a0692 --- /dev/null +++ b/StdLib/LibC/Softfloat/README.NetBSD @@ -0,0 +1,8 @@ +$NetBSD: README.NetBSD,v 1.2 2002/05/21 23:51:05 bjh21 Exp $ + +This is a modified version of part of John Hauser's SoftFloat 2a package. +This version has been heavily modified to support its use with GCC to +implement built-in floating-point operations, but compiling +softfloat.c without SOFTFLOAT_FOR_GCC defined should get you the same +results as from the original. + diff --git a/StdLib/LibC/Softfloat/README.txt b/StdLib/LibC/Softfloat/README.txt new file mode 100644 index 0000000000..de6052e468 --- /dev/null +++ b/StdLib/LibC/Softfloat/README.txt @@ -0,0 +1,39 @@ +$NetBSD: README.txt,v 1.1 2000/06/06 08:15:02 bjh21 Exp $ + +Package Overview for SoftFloat Release 2a + +John R. Hauser +1998 December 13 + + +SoftFloat is a software implementation of floating-point that conforms to +the IEC/IEEE Standard for Binary Floating-Point Arithmetic. SoftFloat is +distributed in the form of C source code. Compiling the SoftFloat sources +generates two things: + +-- A SoftFloat object file (typically `softfloat.o') containing the complete + set of IEC/IEEE floating-point routines. + +-- A `timesoftfloat' program for evaluating the speed of the SoftFloat + routines. (The SoftFloat module is linked into this program.) + +The SoftFloat package is documented in four text files: + + softfloat.txt Documentation for using the SoftFloat functions. + softfloat-source.txt Documentation for compiling SoftFloat. + softfloat-history.txt History of major changes to SoftFloat. + timesoftfloat.txt Documentation for using `timesoftfloat'. + +Other files in the package comprise the source code for SoftFloat. + +Please be aware that some work is involved in porting this software to other +targets. It is not just a matter of getting `make' to complete without +error messages. I would have written the code that way if I could, but +there are fundamental differences between systems that I can't make go away. +You should not attempt to compile SoftFloat without first reading both +`softfloat.txt' and `softfloat-source.txt'. + +At the time of this writing, the most up-to-date information about +SoftFloat and the latest release can be found at the Web page `http:// +HTTP.CS.Berkeley.EDU/~jhauser/arithmetic/SoftFloat.html'. + diff --git a/StdLib/LibC/Softfloat/Softfloat.inf b/StdLib/LibC/Softfloat/Softfloat.inf new file mode 100644 index 0000000000..012190c257 --- /dev/null +++ b/StdLib/LibC/Softfloat/Softfloat.inf @@ -0,0 +1,65 @@ +## @file +# Standard C library: Software floating point Library. +# +# Copyright (c) 2014, ARM Ltd. All rights reserved. +# +# This program and the accompanying materials +# are licensed and made available under the terms and conditions of the BSD License +# which accompanies this distribution. The full text of the license may be found at +# http://opensource.org/licenses/bsd-license.php. +# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +# +# +## + +[Defines] + INF_VERSION = 0x00010005 + BASE_NAME = LibSoftfloat + FILE_GUID = e9f4b929-ee33-4b70-8e90-17d283af508c + MODULE_TYPE = UEFI_APPLICATION + VERSION_STRING = 1.0 + LIBRARY_CLASS = LibSoftfloat + +# +# VALID_ARCHITECTURES = ARM +# + +# Only tested with GCC +# Look at the Makefile.inc file from NetBSD to see how to build + +[Sources.ARM] + bits32/softfloat.c + Arm/__aeabi_dcmpeq.c + Arm/__aeabi_fcmpeq.c + Arm/__aeabi_dcmpge.c + Arm/__aeabi_fcmpge.c + Arm/__aeabi_dcmpgt.c + Arm/__aeabi_fcmpgt.c + Arm/__aeabi_dcmple.c + Arm/__aeabi_fcmple.c + Arm/__aeabi_dcmplt.c + Arm/__aeabi_fcmplt.c + Arm/__aeabi_dcmpun.c + Arm/__aeabi_fcmpun.c + +[Sources] + fpgetround.c + fpsetround.c + fpgetmask.c + fpsetmask.c + fpgetsticky.c + fpsetsticky.c + +[Packages] + StdLib/StdLib.dec + StdLibPrivateInternalFiles/DoNotUse.dec + MdePkg/MdePkg.dec + +################################################################ +# The Build Options, below, are only used when building the C library. +# DO NOT use them when building your application! +# Nasty things could happen if you do. + +[BuildOptions] + GCC:*_*_*_CC_FLAGS = -DSOFTFLOAT_FOR_GCC -Wno-enum-compare -fno-tree-vrp diff --git a/StdLib/LibC/Softfloat/bits32/softfloat-macros b/StdLib/LibC/Softfloat/bits32/softfloat-macros new file mode 100644 index 0000000000..8e1f2d8b9a --- /dev/null +++ b/StdLib/LibC/Softfloat/bits32/softfloat-macros @@ -0,0 +1,648 @@ + +/* +=============================================================================== + +This C source fragment is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2a. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these four paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +/* +------------------------------------------------------------------------------- +Shifts `a' right by the number of bits given in `count'. If any nonzero +bits are shifted off, they are ``jammed'' into the least significant bit of +the result by setting the least significant bit to 1. The value of `count' +can be arbitrarily large; in particular, if `count' is greater than 32, the +result will be either 0 or 1, depending on whether `a' is zero or nonzero. +The result is stored in the location pointed to by `zPtr'. +------------------------------------------------------------------------------- +*/ +INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr ) +{ + bits32 z; + + if ( count == 0 ) { + z = a; + } + else if ( count < 32 ) { + z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 ); + } + else { + z = ( a != 0 ); + } + *zPtr = z; + +} + +/* +------------------------------------------------------------------------------- +Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the +number of bits given in `count'. Any bits shifted off are lost. The value +of `count' can be arbitrarily large; in particular, if `count' is greater +than 64, the result will be 0. The result is broken into two 32-bit pieces +which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + shift64Right( + bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr ) +{ + bits32 z0, z1; + int8 negCount = ( - count ) & 31; + + if ( count == 0 ) { + z1 = a1; + z0 = a0; + } + else if ( count < 32 ) { + z1 = ( a0<>count ); + z0 = a0>>count; + } + else { + z1 = ( count < 64 ) ? ( a0>>( count & 31 ) ) : 0; + z0 = 0; + } + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the +number of bits given in `count'. If any nonzero bits are shifted off, they +are ``jammed'' into the least significant bit of the result by setting the +least significant bit to 1. The value of `count' can be arbitrarily large; +in particular, if `count' is greater than 64, the result will be either 0 +or 1, depending on whether the concatenation of `a0' and `a1' is zero or +nonzero. The result is broken into two 32-bit pieces which are stored at +the locations pointed to by `z0Ptr' and `z1Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + shift64RightJamming( + bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr ) +{ + bits32 z0, z1; + int8 negCount = ( - count ) & 31; + + if ( count == 0 ) { + z1 = a1; + z0 = a0; + } + else if ( count < 32 ) { + z1 = ( a0<>count ) | ( ( a1<>count; + } + else { + if ( count == 32 ) { + z1 = a0 | ( a1 != 0 ); + } + else if ( count < 64 ) { + z1 = ( a0>>( count & 31 ) ) | ( ( ( a0<>count ); + z0 = a0>>count; + } + else { + if ( count == 32 ) { + z2 = a1; + z1 = a0; + } + else { + a2 |= a1; + if ( count < 64 ) { + z2 = a0<>( count & 31 ); + } + else { + z2 = ( count == 64 ) ? a0 : ( a0 != 0 ); + z1 = 0; + } + } + z0 = 0; + } + z2 |= ( a2 != 0 ); + } + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Shifts the 64-bit value formed by concatenating `a0' and `a1' left by the +number of bits given in `count'. Any bits shifted off are lost. The value +of `count' must be less than 32. The result is broken into two 32-bit +pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + shortShift64Left( + bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr ) +{ + + *z1Ptr = a1<>( ( - count ) & 31 ) ); + +} + +/* +------------------------------------------------------------------------------- +Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' left +by the number of bits given in `count'. Any bits shifted off are lost. +The value of `count' must be less than 32. The result is broken into three +32-bit pieces which are stored at the locations pointed to by `z0Ptr', +`z1Ptr', and `z2Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + shortShift96Left( + bits32 a0, + bits32 a1, + bits32 a2, + int16 count, + bits32 *z0Ptr, + bits32 *z1Ptr, + bits32 *z2Ptr + ) +{ + bits32 z0, z1, z2; + int8 negCount; + + z2 = a2<>negCount; + z0 |= a1>>negCount; + } + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Adds the 64-bit value formed by concatenating `a0' and `a1' to the 64-bit +value formed by concatenating `b0' and `b1'. Addition is modulo 2^64, so +any carry out is lost. The result is broken into two 32-bit pieces which +are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + add64( + bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr ) +{ + bits32 z1; + + z1 = a1 + b1; + *z1Ptr = z1; + *z0Ptr = a0 + b0 + ( z1 < a1 ); + +} + +/* +------------------------------------------------------------------------------- +Adds the 96-bit value formed by concatenating `a0', `a1', and `a2' to the +96-bit value formed by concatenating `b0', `b1', and `b2'. Addition is +modulo 2^96, so any carry out is lost. The result is broken into three +32-bit pieces which are stored at the locations pointed to by `z0Ptr', +`z1Ptr', and `z2Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + add96( + bits32 a0, + bits32 a1, + bits32 a2, + bits32 b0, + bits32 b1, + bits32 b2, + bits32 *z0Ptr, + bits32 *z1Ptr, + bits32 *z2Ptr + ) +{ + bits32 z0, z1, z2; + int8 carry0, carry1; + + z2 = a2 + b2; + carry1 = ( z2 < a2 ); + z1 = a1 + b1; + carry0 = ( z1 < a1 ); + z0 = a0 + b0; + z1 += carry1; + z0 += ( z1 < (bits32)carry1 ); + z0 += carry0; + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Subtracts the 64-bit value formed by concatenating `b0' and `b1' from the +64-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo +2^64, so any borrow out (carry out) is lost. The result is broken into two +32-bit pieces which are stored at the locations pointed to by `z0Ptr' and +`z1Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + sub64( + bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr ) +{ + + *z1Ptr = a1 - b1; + *z0Ptr = a0 - b0 - ( a1 < b1 ); + +} + +/* +------------------------------------------------------------------------------- +Subtracts the 96-bit value formed by concatenating `b0', `b1', and `b2' from +the 96-bit value formed by concatenating `a0', `a1', and `a2'. Subtraction +is modulo 2^96, so any borrow out (carry out) is lost. The result is broken +into three 32-bit pieces which are stored at the locations pointed to by +`z0Ptr', `z1Ptr', and `z2Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + sub96( + bits32 a0, + bits32 a1, + bits32 a2, + bits32 b0, + bits32 b1, + bits32 b2, + bits32 *z0Ptr, + bits32 *z1Ptr, + bits32 *z2Ptr + ) +{ + bits32 z0, z1, z2; + int8 borrow0, borrow1; + + z2 = a2 - b2; + borrow1 = ( a2 < b2 ); + z1 = a1 - b1; + borrow0 = ( a1 < b1 ); + z0 = a0 - b0; + z0 -= ( z1 < (bits32)borrow1 ); + z1 -= borrow1; + z0 -= borrow0; + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Multiplies `a' by `b' to obtain a 64-bit product. The product is broken +into two 32-bit pieces which are stored at the locations pointed to by +`z0Ptr' and `z1Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void mul32To64( bits32 a, bits32 b, bits32 *z0Ptr, bits32 *z1Ptr ) +{ + bits16 aHigh, aLow, bHigh, bLow; + bits32 z0, zMiddleA, zMiddleB, z1; + + aLow = a; + aHigh = a>>16; + bLow = b; + bHigh = b>>16; + z1 = ( (bits32) aLow ) * bLow; + zMiddleA = ( (bits32) aLow ) * bHigh; + zMiddleB = ( (bits32) aHigh ) * bLow; + z0 = ( (bits32) aHigh ) * bHigh; + zMiddleA += zMiddleB; + z0 += ( ( (bits32) ( zMiddleA < zMiddleB ) )<<16 ) + ( zMiddleA>>16 ); + zMiddleA <<= 16; + z1 += zMiddleA; + z0 += ( z1 < zMiddleA ); + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Multiplies the 64-bit value formed by concatenating `a0' and `a1' by `b' +to obtain a 96-bit product. The product is broken into three 32-bit pieces +which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and +`z2Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + mul64By32To96( + bits32 a0, + bits32 a1, + bits32 b, + bits32 *z0Ptr, + bits32 *z1Ptr, + bits32 *z2Ptr + ) +{ + bits32 z0, z1, z2, more1; + + mul32To64( a1, b, &z1, &z2 ); + mul32To64( a0, b, &z0, &more1 ); + add64( z0, more1, 0, z1, &z0, &z1 ); + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Multiplies the 64-bit value formed by concatenating `a0' and `a1' to the +64-bit value formed by concatenating `b0' and `b1' to obtain a 128-bit +product. The product is broken into four 32-bit pieces which are stored at +the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + mul64To128( + bits32 a0, + bits32 a1, + bits32 b0, + bits32 b1, + bits32 *z0Ptr, + bits32 *z1Ptr, + bits32 *z2Ptr, + bits32 *z3Ptr + ) +{ + bits32 z0, z1, z2, z3; + bits32 more1, more2; + + mul32To64( a1, b1, &z2, &z3 ); + mul32To64( a1, b0, &z1, &more2 ); + add64( z1, more2, 0, z2, &z1, &z2 ); + mul32To64( a0, b0, &z0, &more1 ); + add64( z0, more1, 0, z1, &z0, &z1 ); + mul32To64( a0, b1, &more1, &more2 ); + add64( more1, more2, 0, z2, &more1, &z2 ); + add64( z0, z1, 0, more1, &z0, &z1 ); + *z3Ptr = z3; + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Returns an approximation to the 32-bit integer quotient obtained by dividing +`b' into the 64-bit value formed by concatenating `a0' and `a1'. The +divisor `b' must be at least 2^31. If q is the exact quotient truncated +toward zero, the approximation returned lies between q and q + 2 inclusive. +If the exact quotient q is larger than 32 bits, the maximum positive 32-bit +unsigned integer is returned. +------------------------------------------------------------------------------- +*/ +static bits32 estimateDiv64To32( bits32 a0, bits32 a1, bits32 b ) +{ + bits32 b0, b1; + bits32 rem0, rem1, term0, term1; + bits32 z; + + if ( b <= a0 ) return 0xFFFFFFFF; + b0 = b>>16; + z = ( b0<<16 <= a0 ) ? 0xFFFF0000 : ( a0 / b0 )<<16; + mul32To64( b, z, &term0, &term1 ); + sub64( a0, a1, term0, term1, &rem0, &rem1 ); + while ( ( (sbits32) rem0 ) < 0 ) { + z -= 0x10000; + b1 = b<<16; + add64( rem0, rem1, b0, b1, &rem0, &rem1 ); + } + rem0 = ( rem0<<16 ) | ( rem1>>16 ); + z |= ( b0<<16 <= rem0 ) ? 0xFFFF : rem0 / b0; + return z; + +} + +#ifndef SOFTFLOAT_FOR_GCC +/* +------------------------------------------------------------------------------- +Returns an approximation to the square root of the 32-bit significand given +by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of +`aExp' (the least significant bit) is 1, the integer returned approximates +2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp' +is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either +case, the approximation returned lies strictly within +/-2 of the exact +value. +------------------------------------------------------------------------------- +*/ +static bits32 estimateSqrt32( int16 aExp, bits32 a ) +{ + static const bits16 sqrtOddAdjustments[] = { + 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0, + 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67 + }; + static const bits16 sqrtEvenAdjustments[] = { + 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E, + 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002 + }; + int8 index; + bits32 z; + + index = ( a>>27 ) & 15; + if ( aExp & 1 ) { + z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ]; + z = ( ( a / z )<<14 ) + ( z<<15 ); + a >>= 1; + } + else { + z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ]; + z = a / z + z; + z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 ); + if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 ); + } + return ( ( estimateDiv64To32( a, 0, z ) )>>1 ) + ( z>>1 ); + +} +#endif + +/* +------------------------------------------------------------------------------- +Returns the number of leading 0 bits before the most-significant 1 bit of +`a'. If `a' is zero, 32 is returned. +------------------------------------------------------------------------------- +*/ +static int8 countLeadingZeros32( bits32 a ) +{ + static const int8 countLeadingZerosHigh[] = { + 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; + int8 shiftCount; + + shiftCount = 0; + if ( a < 0x10000 ) { + shiftCount += 16; + a <<= 16; + } + if ( a < 0x1000000 ) { + shiftCount += 8; + a <<= 8; + } + shiftCount += countLeadingZerosHigh[ a>>24 ]; + return shiftCount; + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is +equal to the 64-bit value formed by concatenating `b0' and `b1'. Otherwise, +returns 0. +------------------------------------------------------------------------------- +*/ +INLINE flag eq64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 ) +{ + + return ( a0 == b0 ) && ( a1 == b1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less +than or equal to the 64-bit value formed by concatenating `b0' and `b1'. +Otherwise, returns 0. +------------------------------------------------------------------------------- +*/ +INLINE flag le64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 ) +{ + + return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less +than the 64-bit value formed by concatenating `b0' and `b1'. Otherwise, +returns 0. +------------------------------------------------------------------------------- +*/ +INLINE flag lt64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 ) +{ + + return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is not +equal to the 64-bit value formed by concatenating `b0' and `b1'. Otherwise, +returns 0. +------------------------------------------------------------------------------- +*/ +INLINE flag ne64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 ) +{ + + return ( a0 != b0 ) || ( a1 != b1 ); + +} + diff --git a/StdLib/LibC/Softfloat/bits32/softfloat.c b/StdLib/LibC/Softfloat/bits32/softfloat.c new file mode 100644 index 0000000000..a513bf94e1 --- /dev/null +++ b/StdLib/LibC/Softfloat/bits32/softfloat.c @@ -0,0 +1,2355 @@ +/* $NetBSD: softfloat.c,v 1.3 2013/01/10 08:16:11 matt Exp $ */ + +/* + * This version hacked for use with gcc -msoft-float by bjh21. + * (Mostly a case of #ifdefing out things GCC doesn't need or provides + * itself). + */ + +/* + * Things you may want to define: + * + * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with + * -msoft-float) to work. Include "softfloat-for-gcc.h" to get them + * properly renamed. + */ + +/* + * This differs from the standard bits32/softfloat.c in that float64 + * is defined to be a 64-bit integer rather than a structure. The + * structure is float64s, with translation between the two going via + * float64u. + */ + +/* +=============================================================================== + +This C source file is part of the SoftFloat IEC/IEEE Floating-Point +Arithmetic Package, Release 2a. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these four paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: softfloat.c,v 1.3 2013/01/10 08:16:11 matt Exp $"); +#endif /* LIBC_SCCS and not lint */ + +#ifdef SOFTFLOAT_FOR_GCC +#include "softfloat-for-gcc.h" +#endif + +#include "milieu.h" +#include "softfloat.h" + +/* + * Conversions between floats as stored in memory and floats as + * SoftFloat uses them + */ +#ifndef FLOAT64_DEMANGLE +#define FLOAT64_DEMANGLE(a) (a) +#endif +#ifndef FLOAT64_MANGLE +#define FLOAT64_MANGLE(a) (a) +#endif + +/* +------------------------------------------------------------------------------- +Floating-point rounding mode and exception flags. +------------------------------------------------------------------------------- +*/ +#ifndef set_float_rounding_mode +fp_rnd float_rounding_mode = float_round_nearest_even; +fp_except float_exception_flags = 0; +#endif +#ifndef set_float_exception_inexact_flag +#define set_float_exception_inexact_flag() \ + ((void)(float_exception_flags |= float_flag_inexact)) +#endif + +/* +------------------------------------------------------------------------------- +Primitive arithmetic functions, including multi-word arithmetic, and +division and square root approximations. (Can be specialized to target if +desired.) +------------------------------------------------------------------------------- +*/ +#include "softfloat-macros" + +/* +------------------------------------------------------------------------------- +Functions and definitions to determine: (1) whether tininess for underflow +is detected before or after rounding by default, (2) what (if anything) +happens when exceptions are raised, (3) how signaling NaNs are distinguished +from quiet NaNs, (4) the default generated quiet NaNs, and (4) how NaNs +are propagated from function inputs to output. These details are target- +specific. +------------------------------------------------------------------------------- +*/ +#include "softfloat-specialize" + +/* +------------------------------------------------------------------------------- +Returns the fraction bits of the single-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE bits32 extractFloat32Frac( float32 a ) +{ + + return a & 0x007FFFFF; + +} + +/* +------------------------------------------------------------------------------- +Returns the exponent bits of the single-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE int16 extractFloat32Exp( float32 a ) +{ + + return ( a>>23 ) & 0xFF; + +} + +/* +------------------------------------------------------------------------------- +Returns the sign bit of the single-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE flag extractFloat32Sign( float32 a ) +{ + + return a>>31; + +} + +/* +------------------------------------------------------------------------------- +Normalizes the subnormal single-precision floating-point value represented +by the denormalized significand `aSig'. The normalized exponent and +significand are stored at the locations pointed to by `zExpPtr' and +`zSigPtr', respectively. +------------------------------------------------------------------------------- +*/ +static void + normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr ) +{ + int8 shiftCount; + + shiftCount = countLeadingZeros32( aSig ) - 8; + *zSigPtr = aSig<>7; + zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); + if ( zSig == 0 ) zExp = 0; + return packFloat32( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Takes an abstract floating-point value having sign `zSign', exponent `zExp', +and significand `zSig', and returns the proper single-precision floating- +point value corresponding to the abstract input. This routine is just like +`roundAndPackFloat32' except that `zSig' does not have to be normalized. +Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' +floating-point exponent. +------------------------------------------------------------------------------- +*/ +static float32 + normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) +{ + int8 shiftCount; + + shiftCount = countLeadingZeros32( zSig ) - 1; + return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<> 32) & 0x000FFFFF); + +} + +/* +------------------------------------------------------------------------------- +Returns the exponent bits of the double-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE int16 extractFloat64Exp( float64 a ) +{ + + return (int16)((FLOAT64_DEMANGLE(a) >> 52) & 0x7FF); + +} + +/* +------------------------------------------------------------------------------- +Returns the sign bit of the double-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE flag extractFloat64Sign( float64 a ) +{ + + return (flag)(FLOAT64_DEMANGLE(a) >> 63); + +} + +/* +------------------------------------------------------------------------------- +Normalizes the subnormal double-precision floating-point value represented +by the denormalized significand formed by the concatenation of `aSig0' and +`aSig1'. The normalized exponent is stored at the location pointed to by +`zExpPtr'. The most significant 21 bits of the normalized significand are +stored at the location pointed to by `zSig0Ptr', and the least significant +32 bits of the normalized significand are stored at the location pointed to +by `zSig1Ptr'. +------------------------------------------------------------------------------- +*/ +static void + normalizeFloat64Subnormal( + bits32 aSig0, + bits32 aSig1, + int16 *zExpPtr, + bits32 *zSig0Ptr, + bits32 *zSig1Ptr + ) +{ + int8 shiftCount; + + if ( aSig0 == 0 ) { + shiftCount = countLeadingZeros32( aSig1 ) - 11; + if ( shiftCount < 0 ) { + *zSig0Ptr = aSig1>>( - shiftCount ); + *zSig1Ptr = aSig1<<( shiftCount & 31 ); + } + else { + *zSig0Ptr = aSig1<>( - shiftCount ); + } + if ( aSigExtra ) set_float_exception_inexact_flag(); + roundingMode = float_rounding_mode; + if ( roundingMode == float_round_nearest_even ) { + if ( (sbits32) aSigExtra < 0 ) { + ++z; + if ( (bits32) ( aSigExtra<<1 ) == 0 ) z &= ~1; + } + if ( aSign ) z = - z; + } + else { + aSigExtra = ( aSigExtra != 0 ); + if ( aSign ) { + z += ( roundingMode == float_round_down ) & aSigExtra; + z = - z; + } + else { + z += ( roundingMode == float_round_up ) & aSigExtra; + } + } + } + return z; + +} +#endif + +/* +------------------------------------------------------------------------------- +Returns the result of converting the single-precision floating-point value +`a' to the 32-bit two's complement integer format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic, except that the conversion is always rounded toward zero. +If `a' is a NaN, the largest positive integer is returned. Otherwise, if +the conversion overflows, the largest integer with the same sign as `a' is +returned. +------------------------------------------------------------------------------- +*/ +int32 float32_to_int32_round_to_zero( float32 a ) +{ + flag aSign; + int16 aExp, shiftCount; + bits32 aSig; + int32 z; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + shiftCount = aExp - 0x9E; + if ( 0 <= shiftCount ) { + if ( a != 0xCF000000 ) { + float_raise( float_flag_invalid ); + if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF; + } + return (sbits32) 0x80000000; + } + else if ( aExp <= 0x7E ) { + if ( aExp | aSig ) set_float_exception_inexact_flag(); + return 0; + } + aSig = ( aSig | 0x00800000 )<<8; + z = aSig>>( - shiftCount ); + if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) { + set_float_exception_inexact_flag(); + } + if ( aSign ) z = - z; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the single-precision floating-point value +`a' to the double-precision floating-point format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float32_to_float64( float32 a ) +{ + flag aSign; + int16 aExp; + bits32 aSig, zSig0, zSig1; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + if ( aExp == 0xFF ) { + if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) ); + return packFloat64( aSign, 0x7FF, 0, 0 ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat64( aSign, 0, 0, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + --aExp; + } + shift64Right( aSig, 0, 3, &zSig0, &zSig1 ); + return packFloat64( aSign, aExp + 0x380, zSig0, zSig1 ); + +} + +#ifndef SOFTFLOAT_FOR_GCC +/* +------------------------------------------------------------------------------- +Rounds the single-precision floating-point value `a' to an integer, +and returns the result as a single-precision floating-point value. The +operation is performed according to the IEC/IEEE Standard for Binary +Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_round_to_int( float32 a ) +{ + flag aSign; + int16 aExp; + bits32 lastBitMask, roundBitsMask; + int8 roundingMode; + float32 z; + + aExp = extractFloat32Exp( a ); + if ( 0x96 <= aExp ) { + if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) { + return propagateFloat32NaN( a, a ); + } + return a; + } + if ( aExp <= 0x7E ) { + if ( (bits32) ( a<<1 ) == 0 ) return a; + set_float_exception_inexact_flag(); + aSign = extractFloat32Sign( a ); + switch ( float_rounding_mode ) { + case float_round_nearest_even: + if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) { + return packFloat32( aSign, 0x7F, 0 ); + } + break; + case float_round_to_zero: + break; + case float_round_down: + return aSign ? 0xBF800000 : 0; + case float_round_up: + return aSign ? 0x80000000 : 0x3F800000; + } + return packFloat32( aSign, 0, 0 ); + } + lastBitMask = 1; + lastBitMask <<= 0x96 - aExp; + roundBitsMask = lastBitMask - 1; + z = a; + roundingMode = float_rounding_mode; + if ( roundingMode == float_round_nearest_even ) { + z += lastBitMask>>1; + if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; + } + else if ( roundingMode != float_round_to_zero ) { + if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) { + z += roundBitsMask; + } + } + z &= ~ roundBitsMask; + if ( z != a ) set_float_exception_inexact_flag(); + return z; + +} +#endif + +/* +------------------------------------------------------------------------------- +Returns the result of adding the absolute values of the single-precision +floating-point values `a' and `b'. If `zSign' is 1, the sum is negated +before being returned. `zSign' is ignored if the result is a NaN. +The addition is performed according to the IEC/IEEE Standard for Binary +Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float32 addFloat32Sigs( float32 a, float32 b, flag zSign ) +{ + int16 aExp, bExp, zExp; + bits32 aSig, bSig, zSig; + int16 expDiff; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + expDiff = aExp - bExp; + aSig <<= 6; + bSig <<= 6; + if ( 0 < expDiff ) { + if ( aExp == 0xFF ) { + if ( aSig ) return propagateFloat32NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig |= 0x20000000; + } + shift32RightJamming( bSig, expDiff, &bSig ); + zExp = aExp; + } + else if ( expDiff < 0 ) { + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + return packFloat32( zSign, 0xFF, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig |= 0x20000000; + } + shift32RightJamming( aSig, - expDiff, &aSig ); + zExp = bExp; + } + else { + if ( aExp == 0xFF ) { + if ( aSig | bSig ) return propagateFloat32NaN( a, b ); + return a; + } + if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 ); + zSig = 0x40000000 + aSig + bSig; + zExp = aExp; + goto roundAndPack; + } + aSig |= 0x20000000; + zSig = ( aSig + bSig )<<1; + --zExp; + if ( (sbits32) zSig < 0 ) { + zSig = aSig + bSig; + ++zExp; + } + roundAndPack: + return roundAndPackFloat32( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the absolute values of the single- +precision floating-point values `a' and `b'. If `zSign' is 1, the +difference is negated before being returned. `zSign' is ignored if the +result is a NaN. The subtraction is performed according to the IEC/IEEE +Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float32 subFloat32Sigs( float32 a, float32 b, flag zSign ) +{ + int16 aExp, bExp, zExp; + bits32 aSig, bSig, zSig; + int16 expDiff; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + expDiff = aExp - bExp; + aSig <<= 7; + bSig <<= 7; + if ( 0 < expDiff ) goto aExpBigger; + if ( expDiff < 0 ) goto bExpBigger; + if ( aExp == 0xFF ) { + if ( aSig | bSig ) return propagateFloat32NaN( a, b ); + float_raise( float_flag_invalid ); + return float32_default_nan; + } + if ( aExp == 0 ) { + aExp = 1; + bExp = 1; + } + if ( bSig < aSig ) goto aBigger; + if ( aSig < bSig ) goto bBigger; + return packFloat32( float_rounding_mode == float_round_down, 0, 0 ); + bExpBigger: + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + return packFloat32( zSign ^ 1, 0xFF, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig |= 0x40000000; + } + shift32RightJamming( aSig, - expDiff, &aSig ); + bSig |= 0x40000000; + bBigger: + zSig = bSig - aSig; + zExp = bExp; + zSign ^= 1; + goto normalizeRoundAndPack; + aExpBigger: + if ( aExp == 0xFF ) { + if ( aSig ) return propagateFloat32NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig |= 0x40000000; + } + shift32RightJamming( bSig, expDiff, &bSig ); + aSig |= 0x40000000; + aBigger: + zSig = aSig - bSig; + zExp = aExp; + normalizeRoundAndPack: + --zExp; + return normalizeRoundAndPackFloat32( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of adding the single-precision floating-point values `a' +and `b'. The operation is performed according to the IEC/IEEE Standard for +Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_add( float32 a, float32 b ) +{ + flag aSign, bSign; + + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign == bSign ) { + return addFloat32Sigs( a, b, aSign ); + } + else { + return subFloat32Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the single-precision floating-point values +`a' and `b'. The operation is performed according to the IEC/IEEE Standard +for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_sub( float32 a, float32 b ) +{ + flag aSign, bSign; + + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign == bSign ) { + return subFloat32Sigs( a, b, aSign ); + } + else { + return addFloat32Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of multiplying the single-precision floating-point values +`a' and `b'. The operation is performed according to the IEC/IEEE Standard +for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_mul( float32 a, float32 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, zExp; + bits32 aSig, bSig, zSig0, zSig1; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + bSign = extractFloat32Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0xFF ) { + if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { + return propagateFloat32NaN( a, b ); + } + if ( ( bExp | bSig ) == 0 ) { + float_raise( float_flag_invalid ); + return float32_default_nan; + } + return packFloat32( zSign, 0xFF, 0 ); + } + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + if ( ( aExp | aSig ) == 0 ) { + float_raise( float_flag_invalid ); + return float32_default_nan; + } + return packFloat32( zSign, 0xFF, 0 ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) return packFloat32( zSign, 0, 0 ); + normalizeFloat32Subnormal( bSig, &bExp, &bSig ); + } + zExp = aExp + bExp - 0x7F; + aSig = ( aSig | 0x00800000 )<<7; + bSig = ( bSig | 0x00800000 )<<8; + mul32To64( aSig, bSig, &zSig0, &zSig1 ); + zSig0 |= ( zSig1 != 0 ); + if ( 0 <= (sbits32) ( zSig0<<1 ) ) { + zSig0 <<= 1; + --zExp; + } + return roundAndPackFloat32( zSign, zExp, zSig0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of dividing the single-precision floating-point value `a' +by the corresponding value `b'. The operation is performed according to the +IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_div( float32 a, float32 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, zExp; + bits32 aSig, bSig, zSig, rem0, rem1, term0, term1; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + bSign = extractFloat32Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0xFF ) { + if ( aSig ) return propagateFloat32NaN( a, b ); + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + float_raise( float_flag_invalid ); + return float32_default_nan; + } + return packFloat32( zSign, 0xFF, 0 ); + } + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + return packFloat32( zSign, 0, 0 ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + if ( ( aExp | aSig ) == 0 ) { + float_raise( float_flag_invalid ); + return float32_default_nan; + } + float_raise( float_flag_divbyzero ); + return packFloat32( zSign, 0xFF, 0 ); + } + normalizeFloat32Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + zExp = aExp - bExp + 0x7D; + aSig = ( aSig | 0x00800000 )<<7; + bSig = ( bSig | 0x00800000 )<<8; + if ( bSig <= ( aSig + aSig ) ) { + aSig >>= 1; + ++zExp; + } + zSig = estimateDiv64To32( aSig, 0, bSig ); + if ( ( zSig & 0x3F ) <= 2 ) { + mul32To64( bSig, zSig, &term0, &term1 ); + sub64( aSig, 0, term0, term1, &rem0, &rem1 ); + while ( (sbits32) rem0 < 0 ) { + --zSig; + add64( rem0, rem1, 0, bSig, &rem0, &rem1 ); + } + zSig |= ( rem1 != 0 ); + } + return roundAndPackFloat32( zSign, zExp, zSig ); + +} + +#ifndef SOFTFLOAT_FOR_GCC +/* +------------------------------------------------------------------------------- +Returns the remainder of the single-precision floating-point value `a' +with respect to the corresponding value `b'. The operation is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_rem( float32 a, float32 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, expDiff; + bits32 aSig, bSig, q, allZero, alternateASig; + sbits32 sigMean; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + bSign = extractFloat32Sign( b ); + if ( aExp == 0xFF ) { + if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { + return propagateFloat32NaN( a, b ); + } + float_raise( float_flag_invalid ); + return float32_default_nan; + } + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + float_raise( float_flag_invalid ); + return float32_default_nan; + } + normalizeFloat32Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return a; + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + expDiff = aExp - bExp; + aSig = ( aSig | 0x00800000 )<<8; + bSig = ( bSig | 0x00800000 )<<8; + if ( expDiff < 0 ) { + if ( expDiff < -1 ) return a; + aSig >>= 1; + } + q = ( bSig <= aSig ); + if ( q ) aSig -= bSig; + expDiff -= 32; + while ( 0 < expDiff ) { + q = estimateDiv64To32( aSig, 0, bSig ); + q = ( 2 < q ) ? q - 2 : 0; + aSig = - ( ( bSig>>2 ) * q ); + expDiff -= 30; + } + expDiff += 32; + if ( 0 < expDiff ) { + q = estimateDiv64To32( aSig, 0, bSig ); + q = ( 2 < q ) ? q - 2 : 0; + q >>= 32 - expDiff; + bSig >>= 2; + aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; + } + else { + aSig >>= 2; + bSig >>= 2; + } + do { + alternateASig = aSig; + ++q; + aSig -= bSig; + } while ( 0 <= (sbits32) aSig ); + sigMean = aSig + alternateASig; + if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { + aSig = alternateASig; + } + zSign = ( (sbits32) aSig < 0 ); + if ( zSign ) aSig = - aSig; + return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig ); + +} +#endif + +#ifndef SOFTFLOAT_FOR_GCC +/* +------------------------------------------------------------------------------- +Returns the square root of the single-precision floating-point value `a'. +The operation is performed according to the IEC/IEEE Standard for Binary +Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_sqrt( float32 a ) +{ + flag aSign; + int16 aExp, zExp; + bits32 aSig, zSig, rem0, rem1, term0, term1; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + if ( aExp == 0xFF ) { + if ( aSig ) return propagateFloat32NaN( a, 0 ); + if ( ! aSign ) return a; + float_raise( float_flag_invalid ); + return float32_default_nan; + } + if ( aSign ) { + if ( ( aExp | aSig ) == 0 ) return a; + float_raise( float_flag_invalid ); + return float32_default_nan; + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return 0; + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E; + aSig = ( aSig | 0x00800000 )<<8; + zSig = estimateSqrt32( aExp, aSig ) + 2; + if ( ( zSig & 0x7F ) <= 5 ) { + if ( zSig < 2 ) { + zSig = 0x7FFFFFFF; + goto roundAndPack; + } + else { + aSig >>= aExp & 1; + mul32To64( zSig, zSig, &term0, &term1 ); + sub64( aSig, 0, term0, term1, &rem0, &rem1 ); + while ( (sbits32) rem0 < 0 ) { + --zSig; + shortShift64Left( 0, zSig, 1, &term0, &term1 ); + term1 |= 1; + add64( rem0, rem1, term0, term1, &rem0, &rem1 ); + } + zSig |= ( ( rem0 | rem1 ) != 0 ); + } + } + shift32RightJamming( zSig, 1, &zSig ); + roundAndPack: + return roundAndPackFloat32( 0, zExp, zSig ); + +} +#endif + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is equal to +the corresponding value `b', and 0 otherwise. The comparison is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_eq( float32 a, float32 b ) +{ + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is less than +or equal to the corresponding value `b', and 0 otherwise. The comparison +is performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_le( float32 a, float32 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); + return ( a == b ) || ( aSign ^ ( a < b ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is less than +the corresponding value `b', and 0 otherwise. The comparison is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_lt( float32 a, float32 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); + return ( a != b ) && ( aSign ^ ( a < b ) ); + +} + +#ifndef SOFTFLOAT_FOR_GCC /* Not needed */ +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is equal to +the corresponding value `b', and 0 otherwise. The invalid exception is +raised if either operand is a NaN. Otherwise, the comparison is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_eq_signaling( float32 a, float32 b ) +{ + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is less than or +equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not +cause an exception. Otherwise, the comparison is performed according to the +IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_le_quiet( float32 a, float32 b ) +{ + flag aSign, bSign; + int16 aExp, bExp; + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); + return ( a == b ) || ( aSign ^ ( a < b ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is less than +the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an +exception. Otherwise, the comparison is performed according to the IEC/IEEE +Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_lt_quiet( float32 a, float32 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); + return ( a != b ) && ( aSign ^ ( a < b ) ); + +} +#endif /* !SOFTFLOAT_FOR_GCC */ + +#ifndef SOFTFLOAT_FOR_GCC /* Not needed */ +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point value +`a' to the 32-bit two's complement integer format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic---which means in particular that the conversion is rounded +according to the current rounding mode. If `a' is a NaN, the largest +positive integer is returned. Otherwise, if the conversion overflows, the +largest integer with the same sign as `a' is returned. +------------------------------------------------------------------------------- +*/ +int32 float64_to_int32( float64 a ) +{ + flag aSign; + int16 aExp, shiftCount; + bits32 aSig0, aSig1, absZ, aSigExtra; + int32 z; + int8 roundingMode; + + aSig1 = extractFloat64Frac1( a ); + aSig0 = extractFloat64Frac0( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + shiftCount = aExp - 0x413; + if ( 0 <= shiftCount ) { + if ( 0x41E < aExp ) { + if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0; + goto invalid; + } + shortShift64Left( + aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra ); + if ( 0x80000000 < absZ ) goto invalid; + } + else { + aSig1 = ( aSig1 != 0 ); + if ( aExp < 0x3FE ) { + aSigExtra = aExp | aSig0 | aSig1; + absZ = 0; + } + else { + aSig0 |= 0x00100000; + aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1; + absZ = aSig0>>( - shiftCount ); + } + } + roundingMode = float_rounding_mode; + if ( roundingMode == float_round_nearest_even ) { + if ( (sbits32) aSigExtra < 0 ) { + ++absZ; + if ( (bits32) ( aSigExtra<<1 ) == 0 ) absZ &= ~1; + } + z = aSign ? - absZ : absZ; + } + else { + aSigExtra = ( aSigExtra != 0 ); + if ( aSign ) { + z = - ( absZ + + ( ( roundingMode == float_round_down ) & aSigExtra ) ); + } + else { + z = absZ + ( ( roundingMode == float_round_up ) & aSigExtra ); + } + } + if ( ( aSign ^ ( z < 0 ) ) && z ) { + invalid: + float_raise( float_flag_invalid ); + return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; + } + if ( aSigExtra ) set_float_exception_inexact_flag(); + return z; + +} +#endif /* !SOFTFLOAT_FOR_GCC */ + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point value +`a' to the 32-bit two's complement integer format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic, except that the conversion is always rounded toward zero. +If `a' is a NaN, the largest positive integer is returned. Otherwise, if +the conversion overflows, the largest integer with the same sign as `a' is +returned. +------------------------------------------------------------------------------- +*/ +int32 float64_to_int32_round_to_zero( float64 a ) +{ + flag aSign; + int16 aExp, shiftCount; + bits32 aSig0, aSig1, absZ, aSigExtra; + int32 z; + + aSig1 = extractFloat64Frac1( a ); + aSig0 = extractFloat64Frac0( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + shiftCount = aExp - 0x413; + if ( 0 <= shiftCount ) { + if ( 0x41E < aExp ) { + if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0; + goto invalid; + } + shortShift64Left( + aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra ); + } + else { + if ( aExp < 0x3FF ) { + if ( aExp | aSig0 | aSig1 ) { + set_float_exception_inexact_flag(); + } + return 0; + } + aSig0 |= 0x00100000; + aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1; + absZ = aSig0>>( - shiftCount ); + } + z = aSign ? - absZ : absZ; + if ( ( aSign ^ ( z < 0 ) ) && z ) { + invalid: + float_raise( float_flag_invalid ); + return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; + } + if ( aSigExtra ) set_float_exception_inexact_flag(); + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point value +`a' to the single-precision floating-point format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float64_to_float32( float64 a ) +{ + flag aSign; + int16 aExp; + bits32 aSig0, aSig1, zSig; + bits32 allZero; + + aSig1 = extractFloat64Frac1( a ); + aSig0 = extractFloat64Frac0( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( aExp == 0x7FF ) { + if ( aSig0 | aSig1 ) { + return commonNaNToFloat32( float64ToCommonNaN( a ) ); + } + return packFloat32( aSign, 0xFF, 0 ); + } + shift64RightJamming( aSig0, aSig1, 22, &allZero, &zSig ); + if ( aExp ) zSig |= 0x40000000; + return roundAndPackFloat32( aSign, aExp - 0x381, zSig ); + +} + +#ifndef SOFTFLOAT_FOR_GCC +/* +------------------------------------------------------------------------------- +Rounds the double-precision floating-point value `a' to an integer, +and returns the result as a double-precision floating-point value. The +operation is performed according to the IEC/IEEE Standard for Binary +Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_round_to_int( float64 a ) +{ + flag aSign; + int16 aExp; + bits32 lastBitMask, roundBitsMask; + int8 roundingMode; + float64 z; + + aExp = extractFloat64Exp( a ); + if ( 0x413 <= aExp ) { + if ( 0x433 <= aExp ) { + if ( ( aExp == 0x7FF ) + && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) { + return propagateFloat64NaN( a, a ); + } + return a; + } + lastBitMask = 1; + lastBitMask = ( lastBitMask<<( 0x432 - aExp ) )<<1; + roundBitsMask = lastBitMask - 1; + z = a; + roundingMode = float_rounding_mode; + if ( roundingMode == float_round_nearest_even ) { + if ( lastBitMask ) { + add64( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low ); + if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; + } + else { + if ( (sbits32) z.low < 0 ) { + ++z.high; + if ( (bits32) ( z.low<<1 ) == 0 ) z.high &= ~1; + } + } + } + else if ( roundingMode != float_round_to_zero ) { + if ( extractFloat64Sign( z ) + ^ ( roundingMode == float_round_up ) ) { + add64( z.high, z.low, 0, roundBitsMask, &z.high, &z.low ); + } + } + z.low &= ~ roundBitsMask; + } + else { + if ( aExp <= 0x3FE ) { + if ( ( ( (bits32) ( a.high<<1 ) ) | a.low ) == 0 ) return a; + set_float_exception_inexact_flag(); + aSign = extractFloat64Sign( a ); + switch ( float_rounding_mode ) { + case float_round_nearest_even: + if ( ( aExp == 0x3FE ) + && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) + ) { + return packFloat64( aSign, 0x3FF, 0, 0 ); + } + break; + case float_round_down: + return + aSign ? packFloat64( 1, 0x3FF, 0, 0 ) + : packFloat64( 0, 0, 0, 0 ); + case float_round_up: + return + aSign ? packFloat64( 1, 0, 0, 0 ) + : packFloat64( 0, 0x3FF, 0, 0 ); + } + return packFloat64( aSign, 0, 0, 0 ); + } + lastBitMask = 1; + lastBitMask <<= 0x413 - aExp; + roundBitsMask = lastBitMask - 1; + z.low = 0; + z.high = a.high; + roundingMode = float_rounding_mode; + if ( roundingMode == float_round_nearest_even ) { + z.high += lastBitMask>>1; + if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) { + z.high &= ~ lastBitMask; + } + } + else if ( roundingMode != float_round_to_zero ) { + if ( extractFloat64Sign( z ) + ^ ( roundingMode == float_round_up ) ) { + z.high |= ( a.low != 0 ); + z.high += roundBitsMask; + } + } + z.high &= ~ roundBitsMask; + } + if ( ( z.low != a.low ) || ( z.high != a.high ) ) { + set_float_exception_inexact_flag(); + } + return z; + +} +#endif + +/* +------------------------------------------------------------------------------- +Returns the result of adding the absolute values of the double-precision +floating-point values `a' and `b'. If `zSign' is 1, the sum is negated +before being returned. `zSign' is ignored if the result is a NaN. +The addition is performed according to the IEC/IEEE Standard for Binary +Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float64 addFloat64Sigs( float64 a, float64 b, flag zSign ) +{ + int16 aExp, bExp, zExp; + bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; + int16 expDiff; + + aSig1 = extractFloat64Frac1( a ); + aSig0 = extractFloat64Frac0( a ); + aExp = extractFloat64Exp( a ); + bSig1 = extractFloat64Frac1( b ); + bSig0 = extractFloat64Frac0( b ); + bExp = extractFloat64Exp( b ); + expDiff = aExp - bExp; + if ( 0 < expDiff ) { + if ( aExp == 0x7FF ) { + if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig0 |= 0x00100000; + } + shift64ExtraRightJamming( + bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 ); + zExp = aExp; + } + else if ( expDiff < 0 ) { + if ( bExp == 0x7FF ) { + if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); + return packFloat64( zSign, 0x7FF, 0, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig0 |= 0x00100000; + } + shift64ExtraRightJamming( + aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 ); + zExp = bExp; + } + else { + if ( aExp == 0x7FF ) { + if ( aSig0 | aSig1 | bSig0 | bSig1 ) { + return propagateFloat64NaN( a, b ); + } + return a; + } + add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); + if ( aExp == 0 ) return packFloat64( zSign, 0, zSig0, zSig1 ); + zSig2 = 0; + zSig0 |= 0x00200000; + zExp = aExp; + goto shiftRight1; + } + aSig0 |= 0x00100000; + add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); + --zExp; + if ( zSig0 < 0x00200000 ) goto roundAndPack; + ++zExp; + shiftRight1: + shift64ExtraRightJamming( zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); + roundAndPack: + return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the absolute values of the double- +precision floating-point values `a' and `b'. If `zSign' is 1, the +difference is negated before being returned. `zSign' is ignored if the +result is a NaN. The subtraction is performed according to the IEC/IEEE +Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) +{ + int16 aExp, bExp, zExp; + bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1; + int16 expDiff; + + aSig1 = extractFloat64Frac1( a ); + aSig0 = extractFloat64Frac0( a ); + aExp = extractFloat64Exp( a ); + bSig1 = extractFloat64Frac1( b ); + bSig0 = extractFloat64Frac0( b ); + bExp = extractFloat64Exp( b ); + expDiff = aExp - bExp; + shortShift64Left( aSig0, aSig1, 10, &aSig0, &aSig1 ); + shortShift64Left( bSig0, bSig1, 10, &bSig0, &bSig1 ); + if ( 0 < expDiff ) goto aExpBigger; + if ( expDiff < 0 ) goto bExpBigger; + if ( aExp == 0x7FF ) { + if ( aSig0 | aSig1 | bSig0 | bSig1 ) { + return propagateFloat64NaN( a, b ); + } + float_raise( float_flag_invalid ); + return float64_default_nan; + } + if ( aExp == 0 ) { + aExp = 1; + bExp = 1; + } + if ( bSig0 < aSig0 ) goto aBigger; + if ( aSig0 < bSig0 ) goto bBigger; + if ( bSig1 < aSig1 ) goto aBigger; + if ( aSig1 < bSig1 ) goto bBigger; + return packFloat64( float_rounding_mode == float_round_down, 0, 0, 0 ); + bExpBigger: + if ( bExp == 0x7FF ) { + if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); + return packFloat64( zSign ^ 1, 0x7FF, 0, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig0 |= 0x40000000; + } + shift64RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); + bSig0 |= 0x40000000; + bBigger: + sub64( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 ); + zExp = bExp; + zSign ^= 1; + goto normalizeRoundAndPack; + aExpBigger: + if ( aExp == 0x7FF ) { + if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig0 |= 0x40000000; + } + shift64RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 ); + aSig0 |= 0x40000000; + aBigger: + sub64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); + zExp = aExp; + normalizeRoundAndPack: + --zExp; + return normalizeRoundAndPackFloat64( zSign, zExp - 10, zSig0, zSig1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of adding the double-precision floating-point values `a' +and `b'. The operation is performed according to the IEC/IEEE Standard for +Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_add( float64 a, float64 b ) +{ + flag aSign, bSign; + + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign == bSign ) { + return addFloat64Sigs( a, b, aSign ); + } + else { + return subFloat64Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the double-precision floating-point values +`a' and `b'. The operation is performed according to the IEC/IEEE Standard +for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_sub( float64 a, float64 b ) +{ + flag aSign, bSign; + + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign == bSign ) { + return subFloat64Sigs( a, b, aSign ); + } + else { + return addFloat64Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of multiplying the double-precision floating-point values +`a' and `b'. The operation is performed according to the IEC/IEEE Standard +for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_mul( float64 a, float64 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, zExp; + bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3; + + aSig1 = extractFloat64Frac1( a ); + aSig0 = extractFloat64Frac0( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + bSig1 = extractFloat64Frac1( b ); + bSig0 = extractFloat64Frac0( b ); + bExp = extractFloat64Exp( b ); + bSign = extractFloat64Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FF ) { + if ( ( aSig0 | aSig1 ) + || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) { + return propagateFloat64NaN( a, b ); + } + if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid; + return packFloat64( zSign, 0x7FF, 0, 0 ); + } + if ( bExp == 0x7FF ) { + if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); + if ( ( aExp | aSig0 | aSig1 ) == 0 ) { + invalid: + float_raise( float_flag_invalid ); + return float64_default_nan; + } + return packFloat64( zSign, 0x7FF, 0, 0 ); + } + if ( aExp == 0 ) { + if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 ); + normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + } + if ( bExp == 0 ) { + if ( ( bSig0 | bSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 ); + normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); + } + zExp = aExp + bExp - 0x400; + aSig0 |= 0x00100000; + shortShift64Left( bSig0, bSig1, 12, &bSig0, &bSig1 ); + mul64To128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 ); + add64( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 ); + zSig2 |= ( zSig3 != 0 ); + if ( 0x00200000 <= zSig0 ) { + shift64ExtraRightJamming( + zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); + ++zExp; + } + return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of dividing the double-precision floating-point value `a' +by the corresponding value `b'. The operation is performed according to the +IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_div( float64 a, float64 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, zExp; + bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; + bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3; + + aSig1 = extractFloat64Frac1( a ); + aSig0 = extractFloat64Frac0( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + bSig1 = extractFloat64Frac1( b ); + bSig0 = extractFloat64Frac0( b ); + bExp = extractFloat64Exp( b ); + bSign = extractFloat64Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FF ) { + if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b ); + if ( bExp == 0x7FF ) { + if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); + goto invalid; + } + return packFloat64( zSign, 0x7FF, 0, 0 ); + } + if ( bExp == 0x7FF ) { + if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); + return packFloat64( zSign, 0, 0, 0 ); + } + if ( bExp == 0 ) { + if ( ( bSig0 | bSig1 ) == 0 ) { + if ( ( aExp | aSig0 | aSig1 ) == 0 ) { + invalid: + float_raise( float_flag_invalid ); + return float64_default_nan; + } + float_raise( float_flag_divbyzero ); + return packFloat64( zSign, 0x7FF, 0, 0 ); + } + normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); + } + if ( aExp == 0 ) { + if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 ); + normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + } + zExp = aExp - bExp + 0x3FD; + shortShift64Left( aSig0 | 0x00100000, aSig1, 11, &aSig0, &aSig1 ); + shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 ); + if ( le64( bSig0, bSig1, aSig0, aSig1 ) ) { + shift64Right( aSig0, aSig1, 1, &aSig0, &aSig1 ); + ++zExp; + } + zSig0 = estimateDiv64To32( aSig0, aSig1, bSig0 ); + mul64By32To96( bSig0, bSig1, zSig0, &term0, &term1, &term2 ); + sub96( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 ); + while ( (sbits32) rem0 < 0 ) { + --zSig0; + add96( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 ); + } + zSig1 = estimateDiv64To32( rem1, rem2, bSig0 ); + if ( ( zSig1 & 0x3FF ) <= 4 ) { + mul64By32To96( bSig0, bSig1, zSig1, &term1, &term2, &term3 ); + sub96( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 ); + while ( (sbits32) rem1 < 0 ) { + --zSig1; + add96( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 ); + } + zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); + } + shift64ExtraRightJamming( zSig0, zSig1, 0, 11, &zSig0, &zSig1, &zSig2 ); + return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 ); + +} + +#ifndef SOFTFLOAT_FOR_GCC +/* +------------------------------------------------------------------------------- +Returns the remainder of the double-precision floating-point value `a' +with respect to the corresponding value `b'. The operation is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_rem( float64 a, float64 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, expDiff; + bits32 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2; + bits32 allZero, alternateASig0, alternateASig1, sigMean1; + sbits32 sigMean0; + float64 z; + + aSig1 = extractFloat64Frac1( a ); + aSig0 = extractFloat64Frac0( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + bSig1 = extractFloat64Frac1( b ); + bSig0 = extractFloat64Frac0( b ); + bExp = extractFloat64Exp( b ); + bSign = extractFloat64Sign( b ); + if ( aExp == 0x7FF ) { + if ( ( aSig0 | aSig1 ) + || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) { + return propagateFloat64NaN( a, b ); + } + goto invalid; + } + if ( bExp == 0x7FF ) { + if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + if ( ( bSig0 | bSig1 ) == 0 ) { + invalid: + float_raise( float_flag_invalid ); + return float64_default_nan; + } + normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); + } + if ( aExp == 0 ) { + if ( ( aSig0 | aSig1 ) == 0 ) return a; + normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + } + expDiff = aExp - bExp; + if ( expDiff < -1 ) return a; + shortShift64Left( + aSig0 | 0x00100000, aSig1, 11 - ( expDiff < 0 ), &aSig0, &aSig1 ); + shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 ); + q = le64( bSig0, bSig1, aSig0, aSig1 ); + if ( q ) sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); + expDiff -= 32; + while ( 0 < expDiff ) { + q = estimateDiv64To32( aSig0, aSig1, bSig0 ); + q = ( 4 < q ) ? q - 4 : 0; + mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 ); + shortShift96Left( term0, term1, term2, 29, &term1, &term2, &allZero ); + shortShift64Left( aSig0, aSig1, 29, &aSig0, &allZero ); + sub64( aSig0, 0, term1, term2, &aSig0, &aSig1 ); + expDiff -= 29; + } + if ( -32 < expDiff ) { + q = estimateDiv64To32( aSig0, aSig1, bSig0 ); + q = ( 4 < q ) ? q - 4 : 0; + q >>= - expDiff; + shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 ); + expDiff += 24; + if ( expDiff < 0 ) { + shift64Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); + } + else { + shortShift64Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 ); + } + mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 ); + sub64( aSig0, aSig1, term1, term2, &aSig0, &aSig1 ); + } + else { + shift64Right( aSig0, aSig1, 8, &aSig0, &aSig1 ); + shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 ); + } + do { + alternateASig0 = aSig0; + alternateASig1 = aSig1; + ++q; + sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); + } while ( 0 <= (sbits32) aSig0 ); + add64( + aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 ); + if ( ( sigMean0 < 0 ) + || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) { + aSig0 = alternateASig0; + aSig1 = alternateASig1; + } + zSign = ( (sbits32) aSig0 < 0 ); + if ( zSign ) sub64( 0, 0, aSig0, aSig1, &aSig0, &aSig1 ); + return + normalizeRoundAndPackFloat64( aSign ^ zSign, bExp - 4, aSig0, aSig1 ); + +} +#endif + +#ifndef SOFTFLOAT_FOR_GCC +/* +------------------------------------------------------------------------------- +Returns the square root of the double-precision floating-point value `a'. +The operation is performed according to the IEC/IEEE Standard for Binary +Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_sqrt( float64 a ) +{ + flag aSign; + int16 aExp, zExp; + bits32 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0; + bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3; + float64 z; + + aSig1 = extractFloat64Frac1( a ); + aSig0 = extractFloat64Frac0( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( aExp == 0x7FF ) { + if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, a ); + if ( ! aSign ) return a; + goto invalid; + } + if ( aSign ) { + if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a; + invalid: + float_raise( float_flag_invalid ); + return float64_default_nan; + } + if ( aExp == 0 ) { + if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( 0, 0, 0, 0 ); + normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + } + zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE; + aSig0 |= 0x00100000; + shortShift64Left( aSig0, aSig1, 11, &term0, &term1 ); + zSig0 = ( estimateSqrt32( aExp, term0 )>>1 ) + 1; + if ( zSig0 == 0 ) zSig0 = 0x7FFFFFFF; + doubleZSig0 = zSig0 + zSig0; + shortShift64Left( aSig0, aSig1, 9 - ( aExp & 1 ), &aSig0, &aSig1 ); + mul32To64( zSig0, zSig0, &term0, &term1 ); + sub64( aSig0, aSig1, term0, term1, &rem0, &rem1 ); + while ( (sbits32) rem0 < 0 ) { + --zSig0; + doubleZSig0 -= 2; + add64( rem0, rem1, 0, doubleZSig0 | 1, &rem0, &rem1 ); + } + zSig1 = estimateDiv64To32( rem1, 0, doubleZSig0 ); + if ( ( zSig1 & 0x1FF ) <= 5 ) { + if ( zSig1 == 0 ) zSig1 = 1; + mul32To64( doubleZSig0, zSig1, &term1, &term2 ); + sub64( rem1, 0, term1, term2, &rem1, &rem2 ); + mul32To64( zSig1, zSig1, &term2, &term3 ); + sub96( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); + while ( (sbits32) rem1 < 0 ) { + --zSig1; + shortShift64Left( 0, zSig1, 1, &term2, &term3 ); + term3 |= 1; + term2 |= doubleZSig0; + add96( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 ); + } + zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); + } + shift64ExtraRightJamming( zSig0, zSig1, 0, 10, &zSig0, &zSig1, &zSig2 ); + return roundAndPackFloat64( 0, zExp, zSig0, zSig1, zSig2 ); + +} +#endif + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is equal to +the corresponding value `b', and 0 otherwise. The comparison is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_eq( float64 a, float64 b ) +{ + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) + && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) + && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) + ) { + if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + return ( a == b ) || + ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is less than +or equal to the corresponding value `b', and 0 otherwise. The comparison +is performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_le( float64 a, float64 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) + && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) + && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign != bSign ) + return aSign || + ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == + 0 ); + return ( a == b ) || + ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) ); +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is less than +the corresponding value `b', and 0 otherwise. The comparison is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_lt( float64 a, float64 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) + && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) + && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign != bSign ) + return aSign && + ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) != + 0 ); + return ( a != b ) && + ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) ); + +} + +#ifndef SOFTFLOAT_FOR_GCC +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is equal to +the corresponding value `b', and 0 otherwise. The invalid exception is +raised if either operand is a NaN. Otherwise, the comparison is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_eq_signaling( float64 a, float64 b ) +{ + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) + && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) + && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is less than or +equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not +cause an exception. Otherwise, the comparison is performed according to the +IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_le_quiet( float64 a, float64 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) + && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) + && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) + ) { + if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 ); + return ( a == b ) || ( aSign ^ ( a < b ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is less than +the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an +exception. Otherwise, the comparison is performed according to the IEC/IEEE +Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_lt_quiet( float64 a, float64 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) + && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) + && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) + ) { + if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 ); + return ( a != b ) && ( aSign ^ ( a < b ) ); + +} + +#endif diff --git a/StdLib/LibC/Softfloat/bits64/softfloat-macros b/StdLib/LibC/Softfloat/bits64/softfloat-macros new file mode 100644 index 0000000000..12ab441f01 --- /dev/null +++ b/StdLib/LibC/Softfloat/bits64/softfloat-macros @@ -0,0 +1,745 @@ +/* $NetBSD: softfloat-macros,v 1.3 2012/03/21 02:32:26 christos Exp $ */ + +/* +=============================================================================== + +This C source fragment is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2a. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these four paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +/* +------------------------------------------------------------------------------- +Shifts `a' right by the number of bits given in `count'. If any nonzero +bits are shifted off, they are ``jammed'' into the least significant bit of +the result by setting the least significant bit to 1. The value of `count' +can be arbitrarily large; in particular, if `count' is greater than 32, the +result will be either 0 or 1, depending on whether `a' is zero or nonzero. +The result is stored in the location pointed to by `zPtr'. +------------------------------------------------------------------------------- +*/ +INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr ) +{ + bits32 z; + + if ( count == 0 ) { + z = a; + } + else if ( count < 32 ) { + z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 ); + } + else { + z = ( a != 0 ); + } + *zPtr = z; + +} + +/* +------------------------------------------------------------------------------- +Shifts `a' right by the number of bits given in `count'. If any nonzero +bits are shifted off, they are ``jammed'' into the least significant bit of +the result by setting the least significant bit to 1. The value of `count' +can be arbitrarily large; in particular, if `count' is greater than 64, the +result will be either 0 or 1, depending on whether `a' is zero or nonzero. +The result is stored in the location pointed to by `zPtr'. +------------------------------------------------------------------------------- +*/ +INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr ) +{ + bits64 z; + + if ( count == 0 ) { + z = a; + } + else if ( count < 64 ) { + z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 ); + } + else { + z = ( a != 0 ); + } + *zPtr = z; + +} + +/* +------------------------------------------------------------------------------- +Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64 +_plus_ the number of bits given in `count'. The shifted result is at most +64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The +bits shifted off form a second 64-bit result as follows: The _last_ bit +shifted off is the most-significant bit of the extra result, and the other +63 bits of the extra result are all zero if and only if _all_but_the_last_ +bits shifted off were all zero. This extra result is stored in the location +pointed to by `z1Ptr'. The value of `count' can be arbitrarily large. + (This routine makes more sense if `a0' and `a1' are considered to form a +fixed-point value with binary point between `a0' and `a1'. This fixed-point +value is shifted right by the number of bits given in `count', and the +integer part of the result is returned at the location pointed to by +`z0Ptr'. The fractional part of the result may be slightly corrupted as +described above, and is returned at the location pointed to by `z1Ptr'.) +------------------------------------------------------------------------------- +*/ +INLINE void + shift64ExtraRightJamming( + bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) +{ + bits64 z0, z1; + int8 negCount = ( - count ) & 63; + + if ( count == 0 ) { + z1 = a1; + z0 = a0; + } + else if ( count < 64 ) { + z1 = ( a0<>count; + } + else { + if ( count == 64 ) { + z1 = a0 | ( a1 != 0 ); + } + else { + z1 = ( ( a0 | a1 ) != 0 ); + } + z0 = 0; + } + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the +number of bits given in `count'. Any bits shifted off are lost. The value +of `count' can be arbitrarily large; in particular, if `count' is greater +than 128, the result will be 0. The result is broken into two 64-bit pieces +which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + shift128Right( + bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) +{ + bits64 z0, z1; + int8 negCount = ( - count ) & 63; + + if ( count == 0 ) { + z1 = a1; + z0 = a0; + } + else if ( count < 64 ) { + z1 = ( a0<>count ); + z0 = a0>>count; + } + else { + z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0; + z0 = 0; + } + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the +number of bits given in `count'. If any nonzero bits are shifted off, they +are ``jammed'' into the least significant bit of the result by setting the +least significant bit to 1. The value of `count' can be arbitrarily large; +in particular, if `count' is greater than 128, the result will be either +0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or +nonzero. The result is broken into two 64-bit pieces which are stored at +the locations pointed to by `z0Ptr' and `z1Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + shift128RightJamming( + bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) +{ + bits64 z0, z1; + int8 negCount = ( - count ) & 63; + + if ( count == 0 ) { + z1 = a1; + z0 = a0; + } + else if ( count < 64 ) { + z1 = ( a0<>count ) | ( ( a1<>count; + } + else { + if ( count == 64 ) { + z1 = a0 | ( a1 != 0 ); + } + else if ( count < 128 ) { + z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<>count ); + z0 = a0>>count; + } + else { + if ( count == 64 ) { + z2 = a1; + z1 = a0; + } + else { + a2 |= a1; + if ( count < 128 ) { + z2 = a0<>( count & 63 ); + } + else { + z2 = ( count == 128 ) ? a0 : ( a0 != 0 ); + z1 = 0; + } + } + z0 = 0; + } + z2 |= ( a2 != 0 ); + } + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the +number of bits given in `count'. Any bits shifted off are lost. The value +of `count' must be less than 64. The result is broken into two 64-bit +pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + shortShift128Left( + bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) +{ + + *z1Ptr = a1<>( ( - count ) & 63 ) ); + +} + +/* +------------------------------------------------------------------------------- +Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left +by the number of bits given in `count'. Any bits shifted off are lost. +The value of `count' must be less than 64. The result is broken into three +64-bit pieces which are stored at the locations pointed to by `z0Ptr', +`z1Ptr', and `z2Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + shortShift192Left( + bits64 a0, + bits64 a1, + bits64 a2, + int16 count, + bits64 *z0Ptr, + bits64 *z1Ptr, + bits64 *z2Ptr + ) +{ + bits64 z0, z1, z2; + int8 negCount; + + z2 = a2<>negCount; + z0 |= a1>>negCount; + } + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit +value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so +any carry out is lost. The result is broken into two 64-bit pieces which +are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + add128( + bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr ) +{ + bits64 z1; + + z1 = a1 + b1; + *z1Ptr = z1; + *z0Ptr = a0 + b0 + ( z1 < a1 ); + +} + +/* +------------------------------------------------------------------------------- +Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the +192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is +modulo 2^192, so any carry out is lost. The result is broken into three +64-bit pieces which are stored at the locations pointed to by `z0Ptr', +`z1Ptr', and `z2Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + add192( + bits64 a0, + bits64 a1, + bits64 a2, + bits64 b0, + bits64 b1, + bits64 b2, + bits64 *z0Ptr, + bits64 *z1Ptr, + bits64 *z2Ptr + ) +{ + bits64 z0, z1, z2; + int8 carry0, carry1; + + z2 = a2 + b2; + carry1 = ( z2 < a2 ); + z1 = a1 + b1; + carry0 = ( z1 < a1 ); + z0 = a0 + b0; + z1 += carry1; + z0 += ( z1 < (bits64)carry1 ); + z0 += carry0; + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the +128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo +2^128, so any borrow out (carry out) is lost. The result is broken into two +64-bit pieces which are stored at the locations pointed to by `z0Ptr' and +`z1Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + sub128( + bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr ) +{ + + *z1Ptr = a1 - b1; + *z0Ptr = a0 - b0 - ( a1 < b1 ); + +} + +/* +------------------------------------------------------------------------------- +Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2' +from the 192-bit value formed by concatenating `a0', `a1', and `a2'. +Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The +result is broken into three 64-bit pieces which are stored at the locations +pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + sub192( + bits64 a0, + bits64 a1, + bits64 a2, + bits64 b0, + bits64 b1, + bits64 b2, + bits64 *z0Ptr, + bits64 *z1Ptr, + bits64 *z2Ptr + ) +{ + bits64 z0, z1, z2; + int8 borrow0, borrow1; + + z2 = a2 - b2; + borrow1 = ( a2 < b2 ); + z1 = a1 - b1; + borrow0 = ( a1 < b1 ); + z0 = a0 - b0; + z0 -= ( z1 < (bits64)borrow1 ); + z1 -= borrow1; + z0 -= borrow0; + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Multiplies `a' by `b' to obtain a 128-bit product. The product is broken +into two 64-bit pieces which are stored at the locations pointed to by +`z0Ptr' and `z1Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr ) +{ + bits32 aHigh, aLow, bHigh, bLow; + bits64 z0, zMiddleA, zMiddleB, z1; + + aLow = (bits32)a; + aHigh = (bits32)(a>>32); + bLow = (bits32)b; + bHigh = (bits32)(b>>32); + z1 = ( (bits64) aLow ) * bLow; + zMiddleA = ( (bits64) aLow ) * bHigh; + zMiddleB = ( (bits64) aHigh ) * bLow; + z0 = ( (bits64) aHigh ) * bHigh; + zMiddleA += zMiddleB; + z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 ); + zMiddleA <<= 32; + z1 += zMiddleA; + z0 += ( z1 < zMiddleA ); + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Multiplies the 128-bit value formed by concatenating `a0' and `a1' by +`b' to obtain a 192-bit product. The product is broken into three 64-bit +pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and +`z2Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + mul128By64To192( + bits64 a0, + bits64 a1, + bits64 b, + bits64 *z0Ptr, + bits64 *z1Ptr, + bits64 *z2Ptr + ) +{ + bits64 z0, z1, z2, more1; + + mul64To128( a1, b, &z1, &z2 ); + mul64To128( a0, b, &z0, &more1 ); + add128( z0, more1, 0, z1, &z0, &z1 ); + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the +128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit +product. The product is broken into four 64-bit pieces which are stored at +the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + mul128To256( + bits64 a0, + bits64 a1, + bits64 b0, + bits64 b1, + bits64 *z0Ptr, + bits64 *z1Ptr, + bits64 *z2Ptr, + bits64 *z3Ptr + ) +{ + bits64 z0, z1, z2, z3; + bits64 more1, more2; + + mul64To128( a1, b1, &z2, &z3 ); + mul64To128( a1, b0, &z1, &more2 ); + add128( z1, more2, 0, z2, &z1, &z2 ); + mul64To128( a0, b0, &z0, &more1 ); + add128( z0, more1, 0, z1, &z0, &z1 ); + mul64To128( a0, b1, &more1, &more2 ); + add128( more1, more2, 0, z2, &more1, &z2 ); + add128( z0, z1, 0, more1, &z0, &z1 ); + *z3Ptr = z3; + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Returns an approximation to the 64-bit integer quotient obtained by dividing +`b' into the 128-bit value formed by concatenating `a0' and `a1'. The +divisor `b' must be at least 2^63. If q is the exact quotient truncated +toward zero, the approximation returned lies between q and q + 2 inclusive. +If the exact quotient q is larger than 64 bits, the maximum positive 64-bit +unsigned integer is returned. +------------------------------------------------------------------------------- +*/ +static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b ) +{ + bits64 b0, b1; + bits64 rem0, rem1, term0, term1; + bits64 z; + + if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF ); + b0 = b>>32; + z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32; + mul64To128( b, z, &term0, &term1 ); + sub128( a0, a1, term0, term1, &rem0, &rem1 ); + while ( ( (sbits64) rem0 ) < 0 ) { + z -= LIT64( 0x100000000 ); + b1 = b<<32; + add128( rem0, rem1, b0, b1, &rem0, &rem1 ); + } + rem0 = ( rem0<<32 ) | ( rem1>>32 ); + z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0; + return z; + +} + +#if !defined(SOFTFLOAT_FOR_GCC) || defined(FLOATX80) || defined(FLOAT128) +/* +------------------------------------------------------------------------------- +Returns an approximation to the square root of the 32-bit significand given +by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of +`aExp' (the least significant bit) is 1, the integer returned approximates +2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp' +is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either +case, the approximation returned lies strictly within +/-2 of the exact +value. +------------------------------------------------------------------------------- +*/ +static bits32 estimateSqrt32( int16 aExp, bits32 a ) +{ + static const bits16 sqrtOddAdjustments[] = { + 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0, + 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67 + }; + static const bits16 sqrtEvenAdjustments[] = { + 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E, + 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002 + }; + int8 idx; + bits32 z; + + idx = ( a>>27 ) & 15; + if ( aExp & 1 ) { + z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ idx ]; + z = ( ( a / z )<<14 ) + ( z<<15 ); + a >>= 1; + } + else { + z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ idx ]; + z = a / z + z; + z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 ); + if ( z <= a ) return (bits32) ( ( (bits32) a )>>1 ); + } + return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 ); + +} +#endif + +/* +------------------------------------------------------------------------------- +Returns the number of leading 0 bits before the most-significant 1 bit of +`a'. If `a' is zero, 32 is returned. +------------------------------------------------------------------------------- +*/ +static int8 countLeadingZeros32( bits32 a ) +{ + static const int8 countLeadingZerosHigh[] = { + 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; + int8 shiftCount; + + shiftCount = 0; + if ( a < 0x10000 ) { + shiftCount += 16; + a <<= 16; + } + if ( a < 0x1000000 ) { + shiftCount += 8; + a <<= 8; + } + shiftCount += countLeadingZerosHigh[ a>>24 ]; + return shiftCount; + +} + +/* +------------------------------------------------------------------------------- +Returns the number of leading 0 bits before the most-significant 1 bit of +`a'. If `a' is zero, 64 is returned. +------------------------------------------------------------------------------- +*/ +static int8 countLeadingZeros64( bits64 a ) +{ + int8 shiftCount; + + shiftCount = 0; + if ( a < ( (bits64) 1 )<<32 ) { + shiftCount += 32; + } + else { + a >>= 32; + } + shiftCount += (int8)countLeadingZeros32( (bits32)a ); + return shiftCount; + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' +is equal to the 128-bit value formed by concatenating `b0' and `b1'. +Otherwise, returns 0. +------------------------------------------------------------------------------- +*/ +INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) +{ + + return ( a0 == b0 ) && ( a1 == b1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less +than or equal to the 128-bit value formed by concatenating `b0' and `b1'. +Otherwise, returns 0. +------------------------------------------------------------------------------- +*/ +INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) +{ + + return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less +than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise, +returns 0. +------------------------------------------------------------------------------- +*/ +INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) +{ + + return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is +not equal to the 128-bit value formed by concatenating `b0' and `b1'. +Otherwise, returns 0. +------------------------------------------------------------------------------- +*/ +INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) +{ + + return ( a0 != b0 ) || ( a1 != b1 ); + +} + diff --git a/StdLib/LibC/Softfloat/bits64/softfloat.c b/StdLib/LibC/Softfloat/bits64/softfloat.c new file mode 100644 index 0000000000..e264dd1922 --- /dev/null +++ b/StdLib/LibC/Softfloat/bits64/softfloat.c @@ -0,0 +1,5602 @@ +/* $NetBSD: softfloat.c,v 1.13 2013/11/22 17:04:24 martin Exp $ */ + +/* + * This version hacked for use with gcc -msoft-float by bjh21. + * (Mostly a case of #ifdefing out things GCC doesn't need or provides + * itself). + */ + +/* + * Things you may want to define: + * + * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with + * -msoft-float) to work. Include "softfloat-for-gcc.h" to get them + * properly renamed. + */ + +/* +=============================================================================== + +This C source file is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2a. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these four paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: softfloat.c,v 1.13 2013/11/22 17:04:24 martin Exp $"); +#endif /* LIBC_SCCS and not lint */ + +#ifdef SOFTFLOAT_FOR_GCC +#include "softfloat-for-gcc.h" +#endif + +#include "milieu.h" +#include "softfloat.h" + +/* + * Conversions between floats as stored in memory and floats as + * SoftFloat uses them + */ +#ifndef FLOAT64_DEMANGLE +#define FLOAT64_DEMANGLE(a) (a) +#endif +#ifndef FLOAT64_MANGLE +#define FLOAT64_MANGLE(a) (a) +#endif + +/* +------------------------------------------------------------------------------- +Floating-point rounding mode, extended double-precision rounding precision, +and exception flags. +------------------------------------------------------------------------------- +*/ +#ifndef set_float_rounding_mode +fp_rnd float_rounding_mode = float_round_nearest_even; +fp_except float_exception_flags = 0; +#endif +#ifndef set_float_exception_inexact_flag +#define set_float_exception_inexact_flag() \ + ((void)(float_exception_flags |= float_flag_inexact)) +#endif +#ifdef FLOATX80 +int8 floatx80_rounding_precision = 80; +#endif + +/* +------------------------------------------------------------------------------- +Primitive arithmetic functions, including multi-word arithmetic, and +division and square root approximations. (Can be specialized to target if +desired.) +------------------------------------------------------------------------------- +*/ +#include "softfloat-macros" + +/* +------------------------------------------------------------------------------- +Functions and definitions to determine: (1) whether tininess for underflow +is detected before or after rounding by default, (2) what (if anything) +happens when exceptions are raised, (3) how signaling NaNs are distinguished +from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs +are propagated from function inputs to output. These details are target- +specific. +------------------------------------------------------------------------------- +*/ +#include "softfloat-specialize" + +#if !defined(SOFTFLOAT_FOR_GCC) || defined(FLOATX80) || defined(FLOAT128) +/* +------------------------------------------------------------------------------- +Takes a 64-bit fixed-point value `absZ' with binary point between bits 6 +and 7, and returns the properly rounded 32-bit integer corresponding to the +input. If `zSign' is 1, the input is negated before being converted to an +integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input +is simply rounded to an integer, with the inexact exception raised if the +input cannot be represented exactly as an integer. However, if the fixed- +point input is too large, the invalid exception is raised and the largest +positive or negative integer is returned. +------------------------------------------------------------------------------- +*/ +static int32 roundAndPackInt32( flag zSign, bits64 absZ ) +{ + int8 roundingMode; + flag roundNearestEven; + int8 roundIncrement, roundBits; + int32 z; + + roundingMode = float_rounding_mode; + roundNearestEven = ( roundingMode == float_round_nearest_even ); + roundIncrement = 0x40; + if ( ! roundNearestEven ) { + if ( roundingMode == float_round_to_zero ) { + roundIncrement = 0; + } + else { + roundIncrement = 0x7F; + if ( zSign ) { + if ( roundingMode == float_round_up ) roundIncrement = 0; + } + else { + if ( roundingMode == float_round_down ) roundIncrement = 0; + } + } + } + roundBits = (int8)(absZ & 0x7F); + absZ = ( absZ + roundIncrement )>>7; + absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); + z = (int32)absZ; + if ( zSign ) z = - z; + if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) { + float_raise( float_flag_invalid ); + return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; + } + if ( roundBits ) set_float_exception_inexact_flag(); + return z; + +} + +/* +------------------------------------------------------------------------------- +Takes the 128-bit fixed-point value formed by concatenating `absZ0' and +`absZ1', with binary point between bits 63 and 64 (between the input words), +and returns the properly rounded 64-bit integer corresponding to the input. +If `zSign' is 1, the input is negated before being converted to an integer. +Ordinarily, the fixed-point input is simply rounded to an integer, with +the inexact exception raised if the input cannot be represented exactly as +an integer. However, if the fixed-point input is too large, the invalid +exception is raised and the largest positive or negative integer is +returned. +------------------------------------------------------------------------------- +*/ +static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 ) +{ + int8 roundingMode; + flag roundNearestEven, increment; + int64 z; + + roundingMode = float_rounding_mode; + roundNearestEven = ( roundingMode == float_round_nearest_even ); + increment = ( (sbits64) absZ1 < 0 ); + if ( ! roundNearestEven ) { + if ( roundingMode == float_round_to_zero ) { + increment = 0; + } + else { + if ( zSign ) { + increment = ( roundingMode == float_round_down ) && absZ1; + } + else { + increment = ( roundingMode == float_round_up ) && absZ1; + } + } + } + if ( increment ) { + ++absZ0; + if ( absZ0 == 0 ) goto overflow; + absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven ); + } + z = absZ0; + if ( zSign ) z = - z; + if ( z && ( ( z < 0 ) ^ zSign ) ) { + overflow: + float_raise( float_flag_invalid ); + return + zSign ? (sbits64) LIT64( 0x8000000000000000 ) + : LIT64( 0x7FFFFFFFFFFFFFFF ); + } + if ( absZ1 ) set_float_exception_inexact_flag(); + return z; + +} +#endif + +/* +------------------------------------------------------------------------------- +Returns the fraction bits of the single-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE bits32 extractFloat32Frac( float32 a ) +{ + + return a & 0x007FFFFF; + +} + +/* +------------------------------------------------------------------------------- +Returns the exponent bits of the single-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE int16 extractFloat32Exp( float32 a ) +{ + + return ( a>>23 ) & 0xFF; + +} + +/* +------------------------------------------------------------------------------- +Returns the sign bit of the single-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE flag extractFloat32Sign( float32 a ) +{ + + return a>>31; + +} + +/* +------------------------------------------------------------------------------- +Normalizes the subnormal single-precision floating-point value represented +by the denormalized significand `aSig'. The normalized exponent and +significand are stored at the locations pointed to by `zExpPtr' and +`zSigPtr', respectively. +------------------------------------------------------------------------------- +*/ +static void + normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr ) +{ + int8 shiftCount; + + shiftCount = countLeadingZeros32( aSig ) - 8; + *zSigPtr = aSig<>7; + zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); + if ( zSig == 0 ) zExp = 0; + return packFloat32( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Takes an abstract floating-point value having sign `zSign', exponent `zExp', +and significand `zSig', and returns the proper single-precision floating- +point value corresponding to the abstract input. This routine is just like +`roundAndPackFloat32' except that `zSig' does not have to be normalized. +Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' +floating-point exponent. +------------------------------------------------------------------------------- +*/ +static float32 + normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) +{ + int8 shiftCount; + + shiftCount = countLeadingZeros32( zSig ) - 1; + return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<> 52) & 0x7FF); + +} + +/* +------------------------------------------------------------------------------- +Returns the sign bit of the double-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE flag extractFloat64Sign( float64 a ) +{ + + return (flag)(FLOAT64_DEMANGLE(a) >> 63); + +} + +/* +------------------------------------------------------------------------------- +Normalizes the subnormal double-precision floating-point value represented +by the denormalized significand `aSig'. The normalized exponent and +significand are stored at the locations pointed to by `zExpPtr' and +`zSigPtr', respectively. +------------------------------------------------------------------------------- +*/ +static void + normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr ) +{ + int8 shiftCount; + + shiftCount = countLeadingZeros64( aSig ) - 11; + *zSigPtr = aSig<>10; + zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven ); + if ( zSig == 0 ) zExp = 0; + return packFloat64( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Takes an abstract floating-point value having sign `zSign', exponent `zExp', +and significand `zSig', and returns the proper double-precision floating- +point value corresponding to the abstract input. This routine is just like +`roundAndPackFloat64' except that `zSig' does not have to be normalized. +Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' +floating-point exponent. +------------------------------------------------------------------------------- +*/ +static float64 + normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig ) +{ + int8 shiftCount; + + shiftCount = countLeadingZeros64( zSig ) - 1; + return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<>15; + +} + +/* +------------------------------------------------------------------------------- +Normalizes the subnormal extended double-precision floating-point value +represented by the denormalized significand `aSig'. The normalized exponent +and significand are stored at the locations pointed to by `zExpPtr' and +`zSigPtr', respectively. +------------------------------------------------------------------------------- +*/ +static void + normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr ) +{ + int8 shiftCount; + + shiftCount = countLeadingZeros64( aSig ); + *zSigPtr = aSig<> 48) & 0x7FFF); + +} + +/* +------------------------------------------------------------------------------- +Returns the sign bit of the quadruple-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE flag extractFloat128Sign( float128 a ) +{ + + return (flag)(a.high >> 63); + +} + +/* +------------------------------------------------------------------------------- +Normalizes the subnormal quadruple-precision floating-point value +represented by the denormalized significand formed by the concatenation of +`aSig0' and `aSig1'. The normalized exponent is stored at the location +pointed to by `zExpPtr'. The most significant 49 bits of the normalized +significand are stored at the location pointed to by `zSig0Ptr', and the +least significant 64 bits of the normalized significand are stored at the +location pointed to by `zSig1Ptr'. +------------------------------------------------------------------------------- +*/ +static void + normalizeFloat128Subnormal( + bits64 aSig0, + bits64 aSig1, + int32 *zExpPtr, + bits64 *zSig0Ptr, + bits64 *zSig1Ptr + ) +{ + int8 shiftCount; + + if ( aSig0 == 0 ) { + shiftCount = countLeadingZeros64( aSig1 ) - 15; + if ( shiftCount < 0 ) { + *zSig0Ptr = aSig1>>( - shiftCount ); + *zSig1Ptr = aSig1<<( shiftCount & 63 ); + } + else { + *zSig0Ptr = aSig1<> 1 ); + return normalizeRoundAndPackFloat32( 0, 0x9C, a ); +} + + +/* +------------------------------------------------------------------------------- +Returns the result of converting the 32-bit two's complement integer `a' +to the double-precision floating-point format. The conversion is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 int32_to_float64( int32 a ) +{ + flag zSign; + uint32 absA; + int8 shiftCount; + bits64 zSig; + + if ( a == 0 ) return 0; + zSign = ( a < 0 ); + absA = zSign ? - a : a; + shiftCount = countLeadingZeros32( absA ) + 21; + zSig = absA; + return packFloat64( zSign, 0x432 - shiftCount, zSig<>( - shiftCount ); + if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) { + set_float_exception_inexact_flag(); + } + if ( aSign ) z = - z; + return z; + +} + +#ifndef SOFTFLOAT_FOR_GCC /* __fix?fdi provided by libgcc2.c */ +/* +------------------------------------------------------------------------------- +Returns the result of converting the single-precision floating-point value +`a' to the 64-bit two's complement integer format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic---which means in particular that the conversion is rounded +according to the current rounding mode. If `a' is a NaN, the largest +positive integer is returned. Otherwise, if the conversion overflows, the +largest integer with the same sign as `a' is returned. +------------------------------------------------------------------------------- +*/ +int64 float32_to_int64( float32 a ) +{ + flag aSign; + int16 aExp, shiftCount; + bits32 aSig; + bits64 aSig64, aSigExtra; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + shiftCount = 0xBE - aExp; + if ( shiftCount < 0 ) { + float_raise( float_flag_invalid ); + if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) { + return LIT64( 0x7FFFFFFFFFFFFFFF ); + } + return (sbits64) LIT64( 0x8000000000000000 ); + } + if ( aExp ) aSig |= 0x00800000; + aSig64 = aSig; + aSig64 <<= 40; + shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra ); + return roundAndPackInt64( aSign, aSig64, aSigExtra ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the single-precision floating-point value +`a' to the 64-bit two's complement integer format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic, except that the conversion is always rounded toward zero. If +`a' is a NaN, the largest positive integer is returned. Otherwise, if the +conversion overflows, the largest integer with the same sign as `a' is +returned. +------------------------------------------------------------------------------- +*/ +int64 float32_to_int64_round_to_zero( float32 a ) +{ + flag aSign; + int16 aExp, shiftCount; + bits32 aSig; + bits64 aSig64; + int64 z; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + shiftCount = aExp - 0xBE; + if ( 0 <= shiftCount ) { + if ( a != 0xDF000000 ) { + float_raise( float_flag_invalid ); + if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) { + return LIT64( 0x7FFFFFFFFFFFFFFF ); + } + } + return (sbits64) LIT64( 0x8000000000000000 ); + } + else if ( aExp <= 0x7E ) { + if ( aExp | aSig ) set_float_exception_inexact_flag(); + return 0; + } + aSig64 = aSig | 0x00800000; + aSig64 <<= 40; + z = aSig64>>( - shiftCount ); + if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) { + set_float_exception_inexact_flag(); + } + if ( aSign ) z = - z; + return z; + +} +#endif /* !SOFTFLOAT_FOR_GCC */ + +/* +------------------------------------------------------------------------------- +Returns the result of converting the single-precision floating-point value +`a' to the double-precision floating-point format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float32_to_float64( float32 a ) +{ + flag aSign; + int16 aExp; + bits32 aSig; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + if ( aExp == 0xFF ) { + if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) ); + return packFloat64( aSign, 0x7FF, 0 ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat64( aSign, 0, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + --aExp; + } + return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 ); + +} + +#ifdef FLOATX80 + +/* +------------------------------------------------------------------------------- +Returns the result of converting the single-precision floating-point value +`a' to the extended double-precision floating-point format. The conversion +is performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 float32_to_floatx80( float32 a ) +{ + flag aSign; + int16 aExp; + bits32 aSig; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + if ( aExp == 0xFF ) { + if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) ); + return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + aSig |= 0x00800000; + return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 ); + +} + +#endif + +#ifdef FLOAT128 + +/* +------------------------------------------------------------------------------- +Returns the result of converting the single-precision floating-point value +`a' to the double-precision floating-point format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic. +------------------------------------------------------------------------------- +*/ +float128 float32_to_float128( float32 a ) +{ + flag aSign; + int16 aExp; + bits32 aSig; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + if ( aExp == 0xFF ) { + if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a ) ); + return packFloat128( aSign, 0x7FFF, 0, 0 ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + --aExp; + } + return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 ); + +} + +#endif + +#ifndef SOFTFLOAT_FOR_GCC /* Not needed */ +/* +------------------------------------------------------------------------------- +Rounds the single-precision floating-point value `a' to an integer, and +returns the result as a single-precision floating-point value. The +operation is performed according to the IEC/IEEE Standard for Binary +Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_round_to_int( float32 a ) +{ + flag aSign; + int16 aExp; + bits32 lastBitMask, roundBitsMask; + int8 roundingMode; + float32 z; + + aExp = extractFloat32Exp( a ); + if ( 0x96 <= aExp ) { + if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) { + return propagateFloat32NaN( a, a ); + } + return a; + } + if ( aExp <= 0x7E ) { + if ( (bits32) ( a<<1 ) == 0 ) return a; + set_float_exception_inexact_flag(); + aSign = extractFloat32Sign( a ); + switch ( float_rounding_mode ) { + case float_round_nearest_even: + if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) { + return packFloat32( aSign, 0x7F, 0 ); + } + break; + case float_round_to_zero: + break; + case float_round_down: + return aSign ? 0xBF800000 : 0; + case float_round_up: + return aSign ? 0x80000000 : 0x3F800000; + } + return packFloat32( aSign, 0, 0 ); + } + lastBitMask = 1; + lastBitMask <<= 0x96 - aExp; + roundBitsMask = lastBitMask - 1; + z = a; + roundingMode = float_rounding_mode; + if ( roundingMode == float_round_nearest_even ) { + z += lastBitMask>>1; + if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; + } + else if ( roundingMode != float_round_to_zero ) { + if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) { + z += roundBitsMask; + } + } + z &= ~ roundBitsMask; + if ( z != a ) set_float_exception_inexact_flag(); + return z; + +} +#endif /* !SOFTFLOAT_FOR_GCC */ + +/* +------------------------------------------------------------------------------- +Returns the result of adding the absolute values of the single-precision +floating-point values `a' and `b'. If `zSign' is 1, the sum is negated +before being returned. `zSign' is ignored if the result is a NaN. +The addition is performed according to the IEC/IEEE Standard for Binary +Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float32 addFloat32Sigs( float32 a, float32 b, flag zSign ) +{ + int16 aExp, bExp, zExp; + bits32 aSig, bSig, zSig; + int16 expDiff; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + expDiff = aExp - bExp; + aSig <<= 6; + bSig <<= 6; + if ( 0 < expDiff ) { + if ( aExp == 0xFF ) { + if ( aSig ) return propagateFloat32NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig |= 0x20000000; + } + shift32RightJamming( bSig, expDiff, &bSig ); + zExp = aExp; + } + else if ( expDiff < 0 ) { + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + return packFloat32( zSign, 0xFF, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig |= 0x20000000; + } + shift32RightJamming( aSig, - expDiff, &aSig ); + zExp = bExp; + } + else { + if ( aExp == 0xFF ) { + if ( aSig | bSig ) return propagateFloat32NaN( a, b ); + return a; + } + if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 ); + zSig = 0x40000000 + aSig + bSig; + zExp = aExp; + goto roundAndPack; + } + aSig |= 0x20000000; + zSig = ( aSig + bSig )<<1; + --zExp; + if ( (sbits32) zSig < 0 ) { + zSig = aSig + bSig; + ++zExp; + } + roundAndPack: + return roundAndPackFloat32( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the absolute values of the single- +precision floating-point values `a' and `b'. If `zSign' is 1, the +difference is negated before being returned. `zSign' is ignored if the +result is a NaN. The subtraction is performed according to the IEC/IEEE +Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float32 subFloat32Sigs( float32 a, float32 b, flag zSign ) +{ + int16 aExp, bExp, zExp; + bits32 aSig, bSig, zSig; + int16 expDiff; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + expDiff = aExp - bExp; + aSig <<= 7; + bSig <<= 7; + if ( 0 < expDiff ) goto aExpBigger; + if ( expDiff < 0 ) goto bExpBigger; + if ( aExp == 0xFF ) { + if ( aSig | bSig ) return propagateFloat32NaN( a, b ); + float_raise( float_flag_invalid ); + return float32_default_nan; + } + if ( aExp == 0 ) { + aExp = 1; + bExp = 1; + } + if ( bSig < aSig ) goto aBigger; + if ( aSig < bSig ) goto bBigger; + return packFloat32( float_rounding_mode == float_round_down, 0, 0 ); + bExpBigger: + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + return packFloat32( zSign ^ 1, 0xFF, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig |= 0x40000000; + } + shift32RightJamming( aSig, - expDiff, &aSig ); + bSig |= 0x40000000; + bBigger: + zSig = bSig - aSig; + zExp = bExp; + zSign ^= 1; + goto normalizeRoundAndPack; + aExpBigger: + if ( aExp == 0xFF ) { + if ( aSig ) return propagateFloat32NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig |= 0x40000000; + } + shift32RightJamming( bSig, expDiff, &bSig ); + aSig |= 0x40000000; + aBigger: + zSig = aSig - bSig; + zExp = aExp; + normalizeRoundAndPack: + --zExp; + return normalizeRoundAndPackFloat32( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of adding the single-precision floating-point values `a' +and `b'. The operation is performed according to the IEC/IEEE Standard for +Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_add( float32 a, float32 b ) +{ + flag aSign, bSign; + + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign == bSign ) { + return addFloat32Sigs( a, b, aSign ); + } + else { + return subFloat32Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the single-precision floating-point values +`a' and `b'. The operation is performed according to the IEC/IEEE Standard +for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_sub( float32 a, float32 b ) +{ + flag aSign, bSign; + + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign == bSign ) { + return subFloat32Sigs( a, b, aSign ); + } + else { + return addFloat32Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of multiplying the single-precision floating-point values +`a' and `b'. The operation is performed according to the IEC/IEEE Standard +for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_mul( float32 a, float32 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, zExp; + bits32 aSig, bSig; + bits64 zSig64; + bits32 zSig; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + bSign = extractFloat32Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0xFF ) { + if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { + return propagateFloat32NaN( a, b ); + } + if ( ( bExp | bSig ) == 0 ) { + float_raise( float_flag_invalid ); + return float32_default_nan; + } + return packFloat32( zSign, 0xFF, 0 ); + } + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + if ( ( aExp | aSig ) == 0 ) { + float_raise( float_flag_invalid ); + return float32_default_nan; + } + return packFloat32( zSign, 0xFF, 0 ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) return packFloat32( zSign, 0, 0 ); + normalizeFloat32Subnormal( bSig, &bExp, &bSig ); + } + zExp = aExp + bExp - 0x7F; + aSig = ( aSig | 0x00800000 )<<7; + bSig = ( bSig | 0x00800000 )<<8; + shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 ); + zSig = (bits32)zSig64; + if ( 0 <= (sbits32) ( zSig<<1 ) ) { + zSig <<= 1; + --zExp; + } + return roundAndPackFloat32( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of dividing the single-precision floating-point value `a' +by the corresponding value `b'. The operation is performed according to the +IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_div( float32 a, float32 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, zExp; + bits32 aSig, bSig, zSig; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + bSign = extractFloat32Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0xFF ) { + if ( aSig ) return propagateFloat32NaN( a, b ); + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + float_raise( float_flag_invalid ); + return float32_default_nan; + } + return packFloat32( zSign, 0xFF, 0 ); + } + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + return packFloat32( zSign, 0, 0 ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + if ( ( aExp | aSig ) == 0 ) { + float_raise( float_flag_invalid ); + return float32_default_nan; + } + float_raise( float_flag_divbyzero ); + return packFloat32( zSign, 0xFF, 0 ); + } + normalizeFloat32Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + zExp = aExp - bExp + 0x7D; + aSig = ( aSig | 0x00800000 )<<7; + bSig = ( bSig | 0x00800000 )<<8; + if ( bSig <= ( aSig + aSig ) ) { + aSig >>= 1; + ++zExp; + } + zSig = (bits32)((((bits64) aSig) << 32) / bSig); + if ( ( zSig & 0x3F ) == 0 ) { + zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 ); + } + return roundAndPackFloat32( zSign, zExp, zSig ); + +} + +#ifndef SOFTFLOAT_FOR_GCC /* Not needed */ +/* +------------------------------------------------------------------------------- +Returns the remainder of the single-precision floating-point value `a' +with respect to the corresponding value `b'. The operation is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_rem( float32 a, float32 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, expDiff; + bits32 aSig, bSig; + bits32 q; + bits64 aSig64, bSig64, q64; + bits32 alternateASig; + sbits32 sigMean; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + bSign = extractFloat32Sign( b ); + if ( aExp == 0xFF ) { + if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { + return propagateFloat32NaN( a, b ); + } + float_raise( float_flag_invalid ); + return float32_default_nan; + } + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + float_raise( float_flag_invalid ); + return float32_default_nan; + } + normalizeFloat32Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return a; + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + expDiff = aExp - bExp; + aSig |= 0x00800000; + bSig |= 0x00800000; + if ( expDiff < 32 ) { + aSig <<= 8; + bSig <<= 8; + if ( expDiff < 0 ) { + if ( expDiff < -1 ) return a; + aSig >>= 1; + } + q = ( bSig <= aSig ); + if ( q ) aSig -= bSig; + if ( 0 < expDiff ) { + q = ( ( (bits64) aSig )<<32 ) / bSig; + q >>= 32 - expDiff; + bSig >>= 2; + aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; + } + else { + aSig >>= 2; + bSig >>= 2; + } + } + else { + if ( bSig <= aSig ) aSig -= bSig; + aSig64 = ( (bits64) aSig )<<40; + bSig64 = ( (bits64) bSig )<<40; + expDiff -= 64; + while ( 0 < expDiff ) { + q64 = estimateDiv128To64( aSig64, 0, bSig64 ); + q64 = ( 2 < q64 ) ? q64 - 2 : 0; + aSig64 = - ( ( bSig * q64 )<<38 ); + expDiff -= 62; + } + expDiff += 64; + q64 = estimateDiv128To64( aSig64, 0, bSig64 ); + q64 = ( 2 < q64 ) ? q64 - 2 : 0; + q = q64>>( 64 - expDiff ); + bSig <<= 6; + aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q; + } + do { + alternateASig = aSig; + ++q; + aSig -= bSig; + } while ( 0 <= (sbits32) aSig ); + sigMean = aSig + alternateASig; + if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { + aSig = alternateASig; + } + zSign = ( (sbits32) aSig < 0 ); + if ( zSign ) aSig = - aSig; + return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig ); + +} +#endif /* !SOFTFLOAT_FOR_GCC */ + +#ifndef SOFTFLOAT_FOR_GCC /* Not needed */ +/* +------------------------------------------------------------------------------- +Returns the square root of the single-precision floating-point value `a'. +The operation is performed according to the IEC/IEEE Standard for Binary +Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_sqrt( float32 a ) +{ + flag aSign; + int16 aExp, zExp; + bits32 aSig, zSig; + bits64 rem, term; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + if ( aExp == 0xFF ) { + if ( aSig ) return propagateFloat32NaN( a, 0 ); + if ( ! aSign ) return a; + float_raise( float_flag_invalid ); + return float32_default_nan; + } + if ( aSign ) { + if ( ( aExp | aSig ) == 0 ) return a; + float_raise( float_flag_invalid ); + return float32_default_nan; + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return 0; + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E; + aSig = ( aSig | 0x00800000 )<<8; + zSig = estimateSqrt32( aExp, aSig ) + 2; + if ( ( zSig & 0x7F ) <= 5 ) { + if ( zSig < 2 ) { + zSig = 0x7FFFFFFF; + goto roundAndPack; + } + aSig >>= aExp & 1; + term = ( (bits64) zSig ) * zSig; + rem = ( ( (bits64) aSig )<<32 ) - term; + while ( (sbits64) rem < 0 ) { + --zSig; + rem += ( ( (bits64) zSig )<<1 ) | 1; + } + zSig |= ( rem != 0 ); + } + shift32RightJamming( zSig, 1, &zSig ); + roundAndPack: + return roundAndPackFloat32( 0, zExp, zSig ); + +} +#endif /* !SOFTFLOAT_FOR_GCC */ + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is equal to +the corresponding value `b', and 0 otherwise. The comparison is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_eq( float32 a, float32 b ) +{ + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is less than +or equal to the corresponding value `b', and 0 otherwise. The comparison +is performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_le( float32 a, float32 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); + return ( a == b ) || ( aSign ^ ( a < b ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is less than +the corresponding value `b', and 0 otherwise. The comparison is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_lt( float32 a, float32 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); + return ( a != b ) && ( aSign ^ ( a < b ) ); + +} + +#ifndef SOFTFLOAT_FOR_GCC /* Not needed */ +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is equal to +the corresponding value `b', and 0 otherwise. The invalid exception is +raised if either operand is a NaN. Otherwise, the comparison is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_eq_signaling( float32 a, float32 b ) +{ + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is less than or +equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not +cause an exception. Otherwise, the comparison is performed according to the +IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_le_quiet( float32 a, float32 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); + return ( a == b ) || ( aSign ^ ( a < b ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is less than +the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an +exception. Otherwise, the comparison is performed according to the IEC/IEEE +Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_lt_quiet( float32 a, float32 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); + return ( a != b ) && ( aSign ^ ( a < b ) ); + +} +#endif /* !SOFTFLOAT_FOR_GCC */ + +#ifndef SOFTFLOAT_FOR_GCC /* Not needed */ +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point value +`a' to the 32-bit two's complement integer format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic---which means in particular that the conversion is rounded +according to the current rounding mode. If `a' is a NaN, the largest +positive integer is returned. Otherwise, if the conversion overflows, the +largest integer with the same sign as `a' is returned. +------------------------------------------------------------------------------- +*/ +int32 float64_to_int32( float64 a ) +{ + flag aSign; + int16 aExp, shiftCount; + bits64 aSig; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; + if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); + shiftCount = 0x42C - aExp; + if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig ); + return roundAndPackInt32( aSign, aSig ); + +} +#endif /* !SOFTFLOAT_FOR_GCC */ + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point value +`a' to the 32-bit two's complement integer format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic, except that the conversion is always rounded toward zero. +If `a' is a NaN, the largest positive integer is returned. Otherwise, if +the conversion overflows, the largest integer with the same sign as `a' is +returned. +------------------------------------------------------------------------------- +*/ +int32 float64_to_int32_round_to_zero( float64 a ) +{ + flag aSign; + int16 aExp, shiftCount; + bits64 aSig, savedASig; + int32 z; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( 0x41E < aExp ) { + if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; + goto invalid; + } + else if ( aExp < 0x3FF ) { + if ( aExp || aSig ) set_float_exception_inexact_flag(); + return 0; + } + aSig |= LIT64( 0x0010000000000000 ); + shiftCount = 0x433 - aExp; + savedASig = aSig; + aSig >>= shiftCount; + z = (int32)aSig; + if ( aSign ) z = - z; + if ( ( z < 0 ) ^ aSign ) { + invalid: + float_raise( float_flag_invalid ); + return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; + } + if ( ( aSig<>( - shiftCount ); + if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) { + set_float_exception_inexact_flag(); + } + } + if ( aSign ) z = - z; + return z; + +} +#endif /* !SOFTFLOAT_FOR_GCC */ + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point value +`a' to the single-precision floating-point format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float64_to_float32( float64 a ) +{ + flag aSign; + int16 aExp; + bits64 aSig; + bits32 zSig; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( aExp == 0x7FF ) { + if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) ); + return packFloat32( aSign, 0xFF, 0 ); + } + shift64RightJamming( aSig, 22, &aSig ); + zSig = (bits32)aSig; + if ( aExp || zSig ) { + zSig |= 0x40000000; + aExp -= 0x381; + } + return roundAndPackFloat32( aSign, aExp, zSig ); + +} + +#ifdef FLOATX80 + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point value +`a' to the extended double-precision floating-point format. The conversion +is performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 float64_to_floatx80( float64 a ) +{ + flag aSign; + int16 aExp; + bits64 aSig; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( aExp == 0x7FF ) { + if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) ); + return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + } + return + packFloatx80( + aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 ); + +} + +#endif + +#ifdef FLOAT128 + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point value +`a' to the quadruple-precision floating-point format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic. +------------------------------------------------------------------------------- +*/ +float128 float64_to_float128( float64 a ) +{ + flag aSign; + int16 aExp; + bits64 aSig, zSig0, zSig1; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( aExp == 0x7FF ) { + if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a ) ); + return packFloat128( aSign, 0x7FFF, 0, 0 ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 ); + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + --aExp; + } + shift128Right( aSig, 0, 4, &zSig0, &zSig1 ); + return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 ); + +} + +#endif + +#ifndef SOFTFLOAT_FOR_GCC +/* +------------------------------------------------------------------------------- +Rounds the double-precision floating-point value `a' to an integer, and +returns the result as a double-precision floating-point value. The +operation is performed according to the IEC/IEEE Standard for Binary +Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_round_to_int( float64 a ) +{ + flag aSign; + int16 aExp; + bits64 lastBitMask, roundBitsMask; + int8 roundingMode; + float64 z; + + aExp = extractFloat64Exp( a ); + if ( 0x433 <= aExp ) { + if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) { + return propagateFloat64NaN( a, a ); + } + return a; + } + if ( aExp < 0x3FF ) { + if ( (bits64) ( a<<1 ) == 0 ) return a; + set_float_exception_inexact_flag(); + aSign = extractFloat64Sign( a ); + switch ( float_rounding_mode ) { + case float_round_nearest_even: + if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) { + return packFloat64( aSign, 0x3FF, 0 ); + } + break; + case float_round_to_zero: + break; + case float_round_down: + return aSign ? LIT64( 0xBFF0000000000000 ) : 0; + case float_round_up: + return + aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ); + } + return packFloat64( aSign, 0, 0 ); + } + lastBitMask = 1; + lastBitMask <<= 0x433 - aExp; + roundBitsMask = lastBitMask - 1; + z = a; + roundingMode = float_rounding_mode; + if ( roundingMode == float_round_nearest_even ) { + z += lastBitMask>>1; + if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; + } + else if ( roundingMode != float_round_to_zero ) { + if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) { + z += roundBitsMask; + } + } + z &= ~ roundBitsMask; + if ( z != a ) set_float_exception_inexact_flag(); + return z; + +} +#endif + +/* +------------------------------------------------------------------------------- +Returns the result of adding the absolute values of the double-precision +floating-point values `a' and `b'. If `zSign' is 1, the sum is negated +before being returned. `zSign' is ignored if the result is a NaN. +The addition is performed according to the IEC/IEEE Standard for Binary +Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float64 addFloat64Sigs( float64 a, float64 b, flag zSign ) +{ + int16 aExp, bExp, zExp; + bits64 aSig, bSig, zSig; + int16 expDiff; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + bSig = extractFloat64Frac( b ); + bExp = extractFloat64Exp( b ); + expDiff = aExp - bExp; + aSig <<= 9; + bSig <<= 9; + if ( 0 < expDiff ) { + if ( aExp == 0x7FF ) { + if ( aSig ) return propagateFloat64NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig |= LIT64( 0x2000000000000000 ); + } + shift64RightJamming( bSig, expDiff, &bSig ); + zExp = aExp; + } + else if ( expDiff < 0 ) { + if ( bExp == 0x7FF ) { + if ( bSig ) return propagateFloat64NaN( a, b ); + return packFloat64( zSign, 0x7FF, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig |= LIT64( 0x2000000000000000 ); + } + shift64RightJamming( aSig, - expDiff, &aSig ); + zExp = bExp; + } + else { + if ( aExp == 0x7FF ) { + if ( aSig | bSig ) return propagateFloat64NaN( a, b ); + return a; + } + if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 ); + zSig = LIT64( 0x4000000000000000 ) + aSig + bSig; + zExp = aExp; + goto roundAndPack; + } + aSig |= LIT64( 0x2000000000000000 ); + zSig = ( aSig + bSig )<<1; + --zExp; + if ( (sbits64) zSig < 0 ) { + zSig = aSig + bSig; + ++zExp; + } + roundAndPack: + return roundAndPackFloat64( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the absolute values of the double- +precision floating-point values `a' and `b'. If `zSign' is 1, the +difference is negated before being returned. `zSign' is ignored if the +result is a NaN. The subtraction is performed according to the IEC/IEEE +Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) +{ + int16 aExp, bExp, zExp; + bits64 aSig, bSig, zSig; + int16 expDiff; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + bSig = extractFloat64Frac( b ); + bExp = extractFloat64Exp( b ); + expDiff = aExp - bExp; + aSig <<= 10; + bSig <<= 10; + if ( 0 < expDiff ) goto aExpBigger; + if ( expDiff < 0 ) goto bExpBigger; + if ( aExp == 0x7FF ) { + if ( aSig | bSig ) return propagateFloat64NaN( a, b ); + float_raise( float_flag_invalid ); + return float64_default_nan; + } + if ( aExp == 0 ) { + aExp = 1; + bExp = 1; + } + if ( bSig < aSig ) goto aBigger; + if ( aSig < bSig ) goto bBigger; + return packFloat64( float_rounding_mode == float_round_down, 0, 0 ); + bExpBigger: + if ( bExp == 0x7FF ) { + if ( bSig ) return propagateFloat64NaN( a, b ); + return packFloat64( zSign ^ 1, 0x7FF, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig |= LIT64( 0x4000000000000000 ); + } + shift64RightJamming( aSig, - expDiff, &aSig ); + bSig |= LIT64( 0x4000000000000000 ); + bBigger: + zSig = bSig - aSig; + zExp = bExp; + zSign ^= 1; + goto normalizeRoundAndPack; + aExpBigger: + if ( aExp == 0x7FF ) { + if ( aSig ) return propagateFloat64NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig |= LIT64( 0x4000000000000000 ); + } + shift64RightJamming( bSig, expDiff, &bSig ); + aSig |= LIT64( 0x4000000000000000 ); + aBigger: + zSig = aSig - bSig; + zExp = aExp; + normalizeRoundAndPack: + --zExp; + return normalizeRoundAndPackFloat64( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of adding the double-precision floating-point values `a' +and `b'. The operation is performed according to the IEC/IEEE Standard for +Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_add( float64 a, float64 b ) +{ + flag aSign, bSign; + + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign == bSign ) { + return addFloat64Sigs( a, b, aSign ); + } + else { + return subFloat64Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the double-precision floating-point values +`a' and `b'. The operation is performed according to the IEC/IEEE Standard +for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_sub( float64 a, float64 b ) +{ + flag aSign, bSign; + + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign == bSign ) { + return subFloat64Sigs( a, b, aSign ); + } + else { + return addFloat64Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of multiplying the double-precision floating-point values +`a' and `b'. The operation is performed according to the IEC/IEEE Standard +for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_mul( float64 a, float64 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, zExp; + bits64 aSig, bSig, zSig0, zSig1; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + bSig = extractFloat64Frac( b ); + bExp = extractFloat64Exp( b ); + bSign = extractFloat64Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FF ) { + if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { + return propagateFloat64NaN( a, b ); + } + if ( ( bExp | bSig ) == 0 ) { + float_raise( float_flag_invalid ); + return float64_default_nan; + } + return packFloat64( zSign, 0x7FF, 0 ); + } + if ( bExp == 0x7FF ) { + if ( bSig ) return propagateFloat64NaN( a, b ); + if ( ( aExp | aSig ) == 0 ) { + float_raise( float_flag_invalid ); + return float64_default_nan; + } + return packFloat64( zSign, 0x7FF, 0 ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat64( zSign, 0, 0 ); + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) return packFloat64( zSign, 0, 0 ); + normalizeFloat64Subnormal( bSig, &bExp, &bSig ); + } + zExp = aExp + bExp - 0x3FF; + aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10; + bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; + mul64To128( aSig, bSig, &zSig0, &zSig1 ); + zSig0 |= ( zSig1 != 0 ); + if ( 0 <= (sbits64) ( zSig0<<1 ) ) { + zSig0 <<= 1; + --zExp; + } + return roundAndPackFloat64( zSign, zExp, zSig0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of dividing the double-precision floating-point value `a' +by the corresponding value `b'. The operation is performed according to +the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_div( float64 a, float64 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, zExp; + bits64 aSig, bSig, zSig; + bits64 rem0, rem1; + bits64 term0, term1; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + bSig = extractFloat64Frac( b ); + bExp = extractFloat64Exp( b ); + bSign = extractFloat64Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FF ) { + if ( aSig ) return propagateFloat64NaN( a, b ); + if ( bExp == 0x7FF ) { + if ( bSig ) return propagateFloat64NaN( a, b ); + float_raise( float_flag_invalid ); + return float64_default_nan; + } + return packFloat64( zSign, 0x7FF, 0 ); + } + if ( bExp == 0x7FF ) { + if ( bSig ) return propagateFloat64NaN( a, b ); + return packFloat64( zSign, 0, 0 ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + if ( ( aExp | aSig ) == 0 ) { + float_raise( float_flag_invalid ); + return float64_default_nan; + } + float_raise( float_flag_divbyzero ); + return packFloat64( zSign, 0x7FF, 0 ); + } + normalizeFloat64Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat64( zSign, 0, 0 ); + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + } + zExp = aExp - bExp + 0x3FD; + aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10; + bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; + if ( bSig <= ( aSig + aSig ) ) { + aSig >>= 1; + ++zExp; + } + zSig = estimateDiv128To64( aSig, 0, bSig ); + if ( ( zSig & 0x1FF ) <= 2 ) { + mul64To128( bSig, zSig, &term0, &term1 ); + sub128( aSig, 0, term0, term1, &rem0, &rem1 ); + while ( (sbits64) rem0 < 0 ) { + --zSig; + add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); + } + zSig |= ( rem1 != 0 ); + } + return roundAndPackFloat64( zSign, zExp, zSig ); + +} + +#ifndef SOFTFLOAT_FOR_GCC +/* +------------------------------------------------------------------------------- +Returns the remainder of the double-precision floating-point value `a' +with respect to the corresponding value `b'. The operation is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_rem( float64 a, float64 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, expDiff; + bits64 aSig, bSig; + bits64 q, alternateASig; + sbits64 sigMean; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + bSig = extractFloat64Frac( b ); + bExp = extractFloat64Exp( b ); + bSign = extractFloat64Sign( b ); + if ( aExp == 0x7FF ) { + if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { + return propagateFloat64NaN( a, b ); + } + float_raise( float_flag_invalid ); + return float64_default_nan; + } + if ( bExp == 0x7FF ) { + if ( bSig ) return propagateFloat64NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + float_raise( float_flag_invalid ); + return float64_default_nan; + } + normalizeFloat64Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return a; + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + } + expDiff = aExp - bExp; + aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11; + bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; + if ( expDiff < 0 ) { + if ( expDiff < -1 ) return a; + aSig >>= 1; + } + q = ( bSig <= aSig ); + if ( q ) aSig -= bSig; + expDiff -= 64; + while ( 0 < expDiff ) { + q = estimateDiv128To64( aSig, 0, bSig ); + q = ( 2 < q ) ? q - 2 : 0; + aSig = - ( ( bSig>>2 ) * q ); + expDiff -= 62; + } + expDiff += 64; + if ( 0 < expDiff ) { + q = estimateDiv128To64( aSig, 0, bSig ); + q = ( 2 < q ) ? q - 2 : 0; + q >>= 64 - expDiff; + bSig >>= 2; + aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; + } + else { + aSig >>= 2; + bSig >>= 2; + } + do { + alternateASig = aSig; + ++q; + aSig -= bSig; + } while ( 0 <= (sbits64) aSig ); + sigMean = aSig + alternateASig; + if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { + aSig = alternateASig; + } + zSign = ( (sbits64) aSig < 0 ); + if ( zSign ) aSig = - aSig; + return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the square root of the double-precision floating-point value `a'. +The operation is performed according to the IEC/IEEE Standard for Binary +Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_sqrt( float64 a ) +{ + flag aSign; + int16 aExp, zExp; + bits64 aSig, zSig, doubleZSig; + bits64 rem0, rem1, term0, term1; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( aExp == 0x7FF ) { + if ( aSig ) return propagateFloat64NaN( a, a ); + if ( ! aSign ) return a; + float_raise( float_flag_invalid ); + return float64_default_nan; + } + if ( aSign ) { + if ( ( aExp | aSig ) == 0 ) return a; + float_raise( float_flag_invalid ); + return float64_default_nan; + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return 0; + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + } + zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE; + aSig |= LIT64( 0x0010000000000000 ); + zSig = estimateSqrt32( aExp, aSig>>21 ); + aSig <<= 9 - ( aExp & 1 ); + zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 ); + if ( ( zSig & 0x1FF ) <= 5 ) { + doubleZSig = zSig<<1; + mul64To128( zSig, zSig, &term0, &term1 ); + sub128( aSig, 0, term0, term1, &rem0, &rem1 ); + while ( (sbits64) rem0 < 0 ) { + --zSig; + doubleZSig -= 2; + add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 ); + } + zSig |= ( ( rem0 | rem1 ) != 0 ); + } + return roundAndPackFloat64( 0, zExp, zSig ); + +} +#endif + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is equal to the +corresponding value `b', and 0 otherwise. The comparison is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_eq( float64 a, float64 b ) +{ + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + return ( a == b ) || + ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is less than or +equal to the corresponding value `b', and 0 otherwise. The comparison is +performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_le( float64 a, float64 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign != bSign ) + return aSign || + ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == + 0 ); + return ( a == b ) || + ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is less than +the corresponding value `b', and 0 otherwise. The comparison is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_lt( float64 a, float64 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign != bSign ) + return aSign && + ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) != + 0 ); + return ( a != b ) && + ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) ); + +} + +#ifndef SOFTFLOAT_FOR_GCC +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is equal to the +corresponding value `b', and 0 otherwise. The invalid exception is raised +if either operand is a NaN. Otherwise, the comparison is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_eq_signaling( float64 a, float64 b ) +{ + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is less than or +equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not +cause an exception. Otherwise, the comparison is performed according to the +IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_le_quiet( float64 a, float64 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 ); + return ( a == b ) || ( aSign ^ ( a < b ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is less than +the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an +exception. Otherwise, the comparison is performed according to the IEC/IEEE +Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_lt_quiet( float64 a, float64 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 ); + return ( a != b ) && ( aSign ^ ( a < b ) ); + +} +#endif + +#ifdef FLOATX80 + +/* +------------------------------------------------------------------------------- +Returns the result of converting the extended double-precision floating- +point value `a' to the 32-bit two's complement integer format. The +conversion is performed according to the IEC/IEEE Standard for Binary +Floating-Point Arithmetic---which means in particular that the conversion +is rounded according to the current rounding mode. If `a' is a NaN, the +largest positive integer is returned. Otherwise, if the conversion +overflows, the largest integer with the same sign as `a' is returned. +------------------------------------------------------------------------------- +*/ +int32 floatx80_to_int32( floatx80 a ) +{ + flag aSign; + int32 aExp, shiftCount; + bits64 aSig; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0; + shiftCount = 0x4037 - aExp; + if ( shiftCount <= 0 ) shiftCount = 1; + shift64RightJamming( aSig, shiftCount, &aSig ); + return roundAndPackInt32( aSign, aSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the extended double-precision floating- +point value `a' to the 32-bit two's complement integer format. The +conversion is performed according to the IEC/IEEE Standard for Binary +Floating-Point Arithmetic, except that the conversion is always rounded +toward zero. If `a' is a NaN, the largest positive integer is returned. +Otherwise, if the conversion overflows, the largest integer with the same +sign as `a' is returned. +------------------------------------------------------------------------------- +*/ +int32 floatx80_to_int32_round_to_zero( floatx80 a ) +{ + flag aSign; + int32 aExp, shiftCount; + bits64 aSig, savedASig; + int32 z; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( 0x401E < aExp ) { + if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0; + goto invalid; + } + else if ( aExp < 0x3FFF ) { + if ( aExp || aSig ) set_float_exception_inexact_flag(); + return 0; + } + shiftCount = 0x403E - aExp; + savedASig = aSig; + aSig >>= shiftCount; + z = aSig; + if ( aSign ) z = - z; + if ( ( z < 0 ) ^ aSign ) { + invalid: + float_raise( float_flag_invalid ); + return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; + } + if ( ( aSig<>( - shiftCount ); + if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) { + set_float_exception_inexact_flag(); + } + if ( aSign ) z = - z; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the extended double-precision floating- +point value `a' to the single-precision floating-point format. The +conversion is performed according to the IEC/IEEE Standard for Binary +Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 floatx80_to_float32( floatx80 a ) +{ + flag aSign; + int32 aExp; + bits64 aSig; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig<<1 ) ) { + return commonNaNToFloat32( floatx80ToCommonNaN( a ) ); + } + return packFloat32( aSign, 0xFF, 0 ); + } + shift64RightJamming( aSig, 33, &aSig ); + if ( aExp || aSig ) aExp -= 0x3F81; + return roundAndPackFloat32( aSign, aExp, aSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the extended double-precision floating- +point value `a' to the double-precision floating-point format. The +conversion is performed according to the IEC/IEEE Standard for Binary +Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 floatx80_to_float64( floatx80 a ) +{ + flag aSign; + int32 aExp; + bits64 aSig, zSig; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig<<1 ) ) { + return commonNaNToFloat64( floatx80ToCommonNaN( a ) ); + } + return packFloat64( aSign, 0x7FF, 0 ); + } + shift64RightJamming( aSig, 1, &zSig ); + if ( aExp || aSig ) aExp -= 0x3C01; + return roundAndPackFloat64( aSign, aExp, zSig ); + +} + +#ifdef FLOAT128 + +/* +------------------------------------------------------------------------------- +Returns the result of converting the extended double-precision floating- +point value `a' to the quadruple-precision floating-point format. The +conversion is performed according to the IEC/IEEE Standard for Binary +Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float128 floatx80_to_float128( floatx80 a ) +{ + flag aSign; + int16 aExp; + bits64 aSig, zSig0, zSig1; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) { + return commonNaNToFloat128( floatx80ToCommonNaN( a ) ); + } + shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 ); + return packFloat128( aSign, aExp, zSig0, zSig1 ); + +} + +#endif + +/* +------------------------------------------------------------------------------- +Rounds the extended double-precision floating-point value `a' to an integer, +and returns the result as an extended quadruple-precision floating-point +value. The operation is performed according to the IEC/IEEE Standard for +Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 floatx80_round_to_int( floatx80 a ) +{ + flag aSign; + int32 aExp; + bits64 lastBitMask, roundBitsMask; + int8 roundingMode; + floatx80 z; + + aExp = extractFloatx80Exp( a ); + if ( 0x403E <= aExp ) { + if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) { + return propagateFloatx80NaN( a, a ); + } + return a; + } + if ( aExp < 0x3FFF ) { + if ( ( aExp == 0 ) + && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) { + return a; + } + set_float_exception_inexact_flag(); + aSign = extractFloatx80Sign( a ); + switch ( float_rounding_mode ) { + case float_round_nearest_even: + if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 ) + ) { + return + packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); + } + break; + case float_round_to_zero: + break; + case float_round_down: + return + aSign ? + packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) ) + : packFloatx80( 0, 0, 0 ); + case float_round_up: + return + aSign ? packFloatx80( 1, 0, 0 ) + : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) ); + } + return packFloatx80( aSign, 0, 0 ); + } + lastBitMask = 1; + lastBitMask <<= 0x403E - aExp; + roundBitsMask = lastBitMask - 1; + z = a; + roundingMode = float_rounding_mode; + if ( roundingMode == float_round_nearest_even ) { + z.low += lastBitMask>>1; + if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; + } + else if ( roundingMode != float_round_to_zero ) { + if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) { + z.low += roundBitsMask; + } + } + z.low &= ~ roundBitsMask; + if ( z.low == 0 ) { + ++z.high; + z.low = LIT64( 0x8000000000000000 ); + } + if ( z.low != a.low ) set_float_exception_inexact_flag(); + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of adding the absolute values of the extended double- +precision floating-point values `a' and `b'. If `zSign' is 1, the sum is +negated before being returned. `zSign' is ignored if the result is a NaN. +The addition is performed according to the IEC/IEEE Standard for Binary +Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) +{ + int32 aExp, bExp, zExp; + bits64 aSig, bSig, zSig0, zSig1; + int32 expDiff; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + expDiff = aExp - bExp; + if ( 0 < expDiff ) { + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); + return a; + } + if ( bExp == 0 ) --expDiff; + shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); + zExp = aExp; + } + else if ( expDiff < 0 ) { + if ( bExp == 0x7FFF ) { + if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( aExp == 0 ) ++expDiff; + shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); + zExp = bExp; + } + else { + if ( aExp == 0x7FFF ) { + if ( (bits64) ( ( aSig | bSig )<<1 ) ) { + return propagateFloatx80NaN( a, b ); + } + return a; + } + zSig1 = 0; + zSig0 = aSig + bSig; + if ( aExp == 0 ) { + normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 ); + goto roundAndPack; + } + zExp = aExp; + goto shiftRight1; + } + zSig0 = aSig + bSig; + if ( (sbits64) zSig0 < 0 ) goto roundAndPack; + shiftRight1: + shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 ); + zSig0 |= LIT64( 0x8000000000000000 ); + ++zExp; + roundAndPack: + return + roundAndPackFloatx80( + floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the absolute values of the extended +double-precision floating-point values `a' and `b'. If `zSign' is 1, the +difference is negated before being returned. `zSign' is ignored if the +result is a NaN. The subtraction is performed according to the IEC/IEEE +Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) +{ + int32 aExp, bExp, zExp; + bits64 aSig, bSig, zSig0, zSig1; + int32 expDiff; + floatx80 z; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + expDiff = aExp - bExp; + if ( 0 < expDiff ) goto aExpBigger; + if ( expDiff < 0 ) goto bExpBigger; + if ( aExp == 0x7FFF ) { + if ( (bits64) ( ( aSig | bSig )<<1 ) ) { + return propagateFloatx80NaN( a, b ); + } + float_raise( float_flag_invalid ); + z.low = floatx80_default_nan_low; + z.high = floatx80_default_nan_high; + return z; + } + if ( aExp == 0 ) { + aExp = 1; + bExp = 1; + } + zSig1 = 0; + if ( bSig < aSig ) goto aBigger; + if ( aSig < bSig ) goto bBigger; + return packFloatx80( float_rounding_mode == float_round_down, 0, 0 ); + bExpBigger: + if ( bExp == 0x7FFF ) { + if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); + return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( aExp == 0 ) ++expDiff; + shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); + bBigger: + sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 ); + zExp = bExp; + zSign ^= 1; + goto normalizeRoundAndPack; + aExpBigger: + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); + return a; + } + if ( bExp == 0 ) --expDiff; + shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); + aBigger: + sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 ); + zExp = aExp; + normalizeRoundAndPack: + return + normalizeRoundAndPackFloatx80( + floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of adding the extended double-precision floating-point +values `a' and `b'. The operation is performed according to the IEC/IEEE +Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 floatx80_add( floatx80 a, floatx80 b ) +{ + flag aSign, bSign; + + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign == bSign ) { + return addFloatx80Sigs( a, b, aSign ); + } + else { + return subFloatx80Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the extended double-precision floating- +point values `a' and `b'. The operation is performed according to the +IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 floatx80_sub( floatx80 a, floatx80 b ) +{ + flag aSign, bSign; + + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign == bSign ) { + return subFloatx80Sigs( a, b, aSign ); + } + else { + return addFloatx80Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of multiplying the extended double-precision floating- +point values `a' and `b'. The operation is performed according to the +IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 floatx80_mul( floatx80 a, floatx80 b ) +{ + flag aSign, bSign, zSign; + int32 aExp, bExp, zExp; + bits64 aSig, bSig, zSig0, zSig1; + floatx80 z; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + bSign = extractFloatx80Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig<<1 ) + || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) { + return propagateFloatx80NaN( a, b ); + } + if ( ( bExp | bSig ) == 0 ) goto invalid; + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( bExp == 0x7FFF ) { + if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); + if ( ( aExp | aSig ) == 0 ) { + invalid: + float_raise( float_flag_invalid ); + z.low = floatx80_default_nan_low; + z.high = floatx80_default_nan_high; + return z; + } + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 ); + normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); + } + zExp = aExp + bExp - 0x3FFE; + mul64To128( aSig, bSig, &zSig0, &zSig1 ); + if ( 0 < (sbits64) zSig0 ) { + shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 ); + --zExp; + } + return + roundAndPackFloatx80( + floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of dividing the extended double-precision floating-point +value `a' by the corresponding value `b'. The operation is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 floatx80_div( floatx80 a, floatx80 b ) +{ + flag aSign, bSign, zSign; + int32 aExp, bExp, zExp; + bits64 aSig, bSig, zSig0, zSig1; + bits64 rem0, rem1, rem2, term0, term1, term2; + floatx80 z; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + bSign = extractFloatx80Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); + if ( bExp == 0x7FFF ) { + if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); + goto invalid; + } + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( bExp == 0x7FFF ) { + if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); + return packFloatx80( zSign, 0, 0 ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + if ( ( aExp | aSig ) == 0 ) { + invalid: + float_raise( float_flag_invalid ); + z.low = floatx80_default_nan_low; + z.high = floatx80_default_nan_high; + return z; + } + float_raise( float_flag_divbyzero ); + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); + } + zExp = aExp - bExp + 0x3FFE; + rem1 = 0; + if ( bSig <= aSig ) { + shift128Right( aSig, 0, 1, &aSig, &rem1 ); + ++zExp; + } + zSig0 = estimateDiv128To64( aSig, rem1, bSig ); + mul64To128( bSig, zSig0, &term0, &term1 ); + sub128( aSig, rem1, term0, term1, &rem0, &rem1 ); + while ( (sbits64) rem0 < 0 ) { + --zSig0; + add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); + } + zSig1 = estimateDiv128To64( rem1, 0, bSig ); + if ( (bits64) ( zSig1<<1 ) <= 8 ) { + mul64To128( bSig, zSig1, &term1, &term2 ); + sub128( rem1, 0, term1, term2, &rem1, &rem2 ); + while ( (sbits64) rem1 < 0 ) { + --zSig1; + add128( rem1, rem2, 0, bSig, &rem1, &rem2 ); + } + zSig1 |= ( ( rem1 | rem2 ) != 0 ); + } + return + roundAndPackFloatx80( + floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the remainder of the extended double-precision floating-point value +`a' with respect to the corresponding value `b'. The operation is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 floatx80_rem( floatx80 a, floatx80 b ) +{ + flag aSign, bSign, zSign; + int32 aExp, bExp, expDiff; + bits64 aSig0, aSig1, bSig; + bits64 q, term0, term1, alternateASig0, alternateASig1; + floatx80 z; + + aSig0 = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + bSign = extractFloatx80Sign( b ); + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig0<<1 ) + || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) { + return propagateFloatx80NaN( a, b ); + } + goto invalid; + } + if ( bExp == 0x7FFF ) { + if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + invalid: + float_raise( float_flag_invalid ); + z.low = floatx80_default_nan_low; + z.high = floatx80_default_nan_high; + return z; + } + normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( (bits64) ( aSig0<<1 ) == 0 ) return a; + normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); + } + bSig |= LIT64( 0x8000000000000000 ); + zSign = aSign; + expDiff = aExp - bExp; + aSig1 = 0; + if ( expDiff < 0 ) { + if ( expDiff < -1 ) return a; + shift128Right( aSig0, 0, 1, &aSig0, &aSig1 ); + expDiff = 0; + } + q = ( bSig <= aSig0 ); + if ( q ) aSig0 -= bSig; + expDiff -= 64; + while ( 0 < expDiff ) { + q = estimateDiv128To64( aSig0, aSig1, bSig ); + q = ( 2 < q ) ? q - 2 : 0; + mul64To128( bSig, q, &term0, &term1 ); + sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); + shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 ); + expDiff -= 62; + } + expDiff += 64; + if ( 0 < expDiff ) { + q = estimateDiv128To64( aSig0, aSig1, bSig ); + q = ( 2 < q ) ? q - 2 : 0; + q >>= 64 - expDiff; + mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 ); + sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); + shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 ); + while ( le128( term0, term1, aSig0, aSig1 ) ) { + ++q; + sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); + } + } + else { + term1 = 0; + term0 = bSig; + } + sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 ); + if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 ) + || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 ) + && ( q & 1 ) ) + ) { + aSig0 = alternateASig0; + aSig1 = alternateASig1; + zSign = ! zSign; + } + return + normalizeRoundAndPackFloatx80( + 80, zSign, bExp + expDiff, aSig0, aSig1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the square root of the extended double-precision floating-point +value `a'. The operation is performed according to the IEC/IEEE Standard +for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 floatx80_sqrt( floatx80 a ) +{ + flag aSign; + int32 aExp, zExp; + bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0; + bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3; + floatx80 z; + + aSig0 = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a ); + if ( ! aSign ) return a; + goto invalid; + } + if ( aSign ) { + if ( ( aExp | aSig0 ) == 0 ) return a; + invalid: + float_raise( float_flag_invalid ); + z.low = floatx80_default_nan_low; + z.high = floatx80_default_nan_high; + return z; + } + if ( aExp == 0 ) { + if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 ); + normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); + } + zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF; + zSig0 = estimateSqrt32( aExp, aSig0>>32 ); + shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 ); + zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 ); + doubleZSig0 = zSig0<<1; + mul64To128( zSig0, zSig0, &term0, &term1 ); + sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 ); + while ( (sbits64) rem0 < 0 ) { + --zSig0; + doubleZSig0 -= 2; + add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 ); + } + zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 ); + if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) { + if ( zSig1 == 0 ) zSig1 = 1; + mul64To128( doubleZSig0, zSig1, &term1, &term2 ); + sub128( rem1, 0, term1, term2, &rem1, &rem2 ); + mul64To128( zSig1, zSig1, &term2, &term3 ); + sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); + while ( (sbits64) rem1 < 0 ) { + --zSig1; + shortShift128Left( 0, zSig1, 1, &term2, &term3 ); + term3 |= 1; + term2 |= doubleZSig0; + add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 ); + } + zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); + } + shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 ); + zSig0 |= doubleZSig0; + return + roundAndPackFloatx80( + floatx80_rounding_precision, 0, zExp, zSig0, zSig1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is +equal to the corresponding value `b', and 0 otherwise. The comparison is +performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic. +------------------------------------------------------------------------------- +*/ +flag floatx80_eq( floatx80 a, floatx80 b ) +{ + + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( a )<<1 ) ) + || ( ( extractFloatx80Exp( b ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( b )<<1 ) ) + ) { + if ( floatx80_is_signaling_nan( a ) + || floatx80_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + return + ( a.low == b.low ) + && ( ( a.high == b.high ) + || ( ( a.low == 0 ) + && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) ) + ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is +less than or equal to the corresponding value `b', and 0 otherwise. The +comparison is performed according to the IEC/IEEE Standard for Binary +Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag floatx80_le( floatx80 a, floatx80 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( a )<<1 ) ) + || ( ( extractFloatx80Exp( b ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( b )<<1 ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign != bSign ) { + return + aSign + || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + == 0 ); + } + return + aSign ? le128( b.high, b.low, a.high, a.low ) + : le128( a.high, a.low, b.high, b.low ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is +less than the corresponding value `b', and 0 otherwise. The comparison +is performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic. +------------------------------------------------------------------------------- +*/ +flag floatx80_lt( floatx80 a, floatx80 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( a )<<1 ) ) + || ( ( extractFloatx80Exp( b ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( b )<<1 ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign != bSign ) { + return + aSign + && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + != 0 ); + } + return + aSign ? lt128( b.high, b.low, a.high, a.low ) + : lt128( a.high, a.low, b.high, b.low ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is equal +to the corresponding value `b', and 0 otherwise. The invalid exception is +raised if either operand is a NaN. Otherwise, the comparison is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag floatx80_eq_signaling( floatx80 a, floatx80 b ) +{ + + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( a )<<1 ) ) + || ( ( extractFloatx80Exp( b ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( b )<<1 ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + return + ( a.low == b.low ) + && ( ( a.high == b.high ) + || ( ( a.low == 0 ) + && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) ) + ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is less +than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs +do not cause an exception. Otherwise, the comparison is performed according +to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag floatx80_le_quiet( floatx80 a, floatx80 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( a )<<1 ) ) + || ( ( extractFloatx80Exp( b ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( b )<<1 ) ) + ) { + if ( floatx80_is_signaling_nan( a ) + || floatx80_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign != bSign ) { + return + aSign + || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + == 0 ); + } + return + aSign ? le128( b.high, b.low, a.high, a.low ) + : le128( a.high, a.low, b.high, b.low ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is less +than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause +an exception. Otherwise, the comparison is performed according to the +IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag floatx80_lt_quiet( floatx80 a, floatx80 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( a )<<1 ) ) + || ( ( extractFloatx80Exp( b ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( b )<<1 ) ) + ) { + if ( floatx80_is_signaling_nan( a ) + || floatx80_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign != bSign ) { + return + aSign + && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + != 0 ); + } + return + aSign ? lt128( b.high, b.low, a.high, a.low ) + : lt128( a.high, a.low, b.high, b.low ); + +} + +#endif + +#ifdef FLOAT128 + +/* +------------------------------------------------------------------------------- +Returns the result of converting the quadruple-precision floating-point +value `a' to the 32-bit two's complement integer format. The conversion +is performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic---which means in particular that the conversion is rounded +according to the current rounding mode. If `a' is a NaN, the largest +positive integer is returned. Otherwise, if the conversion overflows, the +largest integer with the same sign as `a' is returned. +------------------------------------------------------------------------------- +*/ +int32 float128_to_int32( float128 a ) +{ + flag aSign; + int32 aExp, shiftCount; + bits64 aSig0, aSig1; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0; + if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 ); + aSig0 |= ( aSig1 != 0 ); + shiftCount = 0x4028 - aExp; + if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 ); + return roundAndPackInt32( aSign, aSig0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the quadruple-precision floating-point +value `a' to the 32-bit two's complement integer format. The conversion +is performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic, except that the conversion is always rounded toward zero. If +`a' is a NaN, the largest positive integer is returned. Otherwise, if the +conversion overflows, the largest integer with the same sign as `a' is +returned. +------------------------------------------------------------------------------- +*/ +int32 float128_to_int32_round_to_zero( float128 a ) +{ + flag aSign; + int32 aExp, shiftCount; + bits64 aSig0, aSig1, savedASig; + int32 z; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + aSig0 |= ( aSig1 != 0 ); + if ( 0x401E < aExp ) { + if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0; + goto invalid; + } + else if ( aExp < 0x3FFF ) { + if ( aExp || aSig0 ) set_float_exception_inexact_flag(); + return 0; + } + aSig0 |= LIT64( 0x0001000000000000 ); + shiftCount = 0x402F - aExp; + savedASig = aSig0; + aSig0 >>= shiftCount; + z = (int32)aSig0; + if ( aSign ) z = - z; + if ( ( z < 0 ) ^ aSign ) { + invalid: + float_raise( float_flag_invalid ); + return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; + } + if ( ( aSig0<>( ( - shiftCount ) & 63 ) ); + if ( (bits64) ( aSig1<>( - shiftCount ); + if ( aSig1 + || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) { + set_float_exception_inexact_flag(); + } + } + if ( aSign ) z = - z; + return z; + +} + +#if (defined(SOFTFLOATSPARC64_FOR_GCC) || defined(SOFTFLOAT_FOR_GCC)) \ + && defined(SOFTFLOAT_NEED_FIXUNS) +/* + * just like above - but do not care for overflow of signed results + */ +uint64 float128_to_uint64_round_to_zero( float128 a ) +{ + flag aSign; + int32 aExp, shiftCount; + bits64 aSig0, aSig1; + uint64 z; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 ); + shiftCount = aExp - 0x402F; + if ( 0 < shiftCount ) { + if ( 0x403F <= aExp ) { + aSig0 &= LIT64( 0x0000FFFFFFFFFFFF ); + if ( ( a.high == LIT64( 0xC03E000000000000 ) ) + && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) { + if ( aSig1 ) set_float_exception_inexact_flag(); + } + else { + float_raise( float_flag_invalid ); + } + return LIT64( 0xFFFFFFFFFFFFFFFF ); + } + z = ( aSig0<>( ( - shiftCount ) & 63 ) ); + if ( (bits64) ( aSig1<>( - shiftCount ); + if (aSig1 || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) { + set_float_exception_inexact_flag(); + } + } + if ( aSign ) z = - z; + return z; + +} +#endif /* (SOFTFLOATSPARC64_FOR_GCC || SOFTFLOAT_FOR_GCC) && SOFTFLOAT_NEED_FIXUNS */ + +/* +------------------------------------------------------------------------------- +Returns the result of converting the quadruple-precision floating-point +value `a' to the single-precision floating-point format. The conversion +is performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float128_to_float32( float128 a ) +{ + flag aSign; + int32 aExp; + bits64 aSig0, aSig1; + bits32 zSig; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 ) { + return commonNaNToFloat32( float128ToCommonNaN( a ) ); + } + return packFloat32( aSign, 0xFF, 0 ); + } + aSig0 |= ( aSig1 != 0 ); + shift64RightJamming( aSig0, 18, &aSig0 ); + zSig = (bits32)aSig0; + if ( aExp || zSig ) { + zSig |= 0x40000000; + aExp -= 0x3F81; + } + return roundAndPackFloat32( aSign, aExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the quadruple-precision floating-point +value `a' to the double-precision floating-point format. The conversion +is performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float128_to_float64( float128 a ) +{ + flag aSign; + int32 aExp; + bits64 aSig0, aSig1; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 ) { + return commonNaNToFloat64( float128ToCommonNaN( a ) ); + } + return packFloat64( aSign, 0x7FF, 0 ); + } + shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 ); + aSig0 |= ( aSig1 != 0 ); + if ( aExp || aSig0 ) { + aSig0 |= LIT64( 0x4000000000000000 ); + aExp -= 0x3C01; + } + return roundAndPackFloat64( aSign, aExp, aSig0 ); + +} + +#ifdef FLOATX80 + +/* +------------------------------------------------------------------------------- +Returns the result of converting the quadruple-precision floating-point +value `a' to the extended double-precision floating-point format. The +conversion is performed according to the IEC/IEEE Standard for Binary +Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 float128_to_floatx80( float128 a ) +{ + flag aSign; + int32 aExp; + bits64 aSig0, aSig1; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 ) { + return commonNaNToFloatx80( float128ToCommonNaN( a ) ); + } + return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( aExp == 0 ) { + if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 ); + normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + } + else { + aSig0 |= LIT64( 0x0001000000000000 ); + } + shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 ); + return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 ); + +} + +#endif + +/* +------------------------------------------------------------------------------- +Rounds the quadruple-precision floating-point value `a' to an integer, and +returns the result as a quadruple-precision floating-point value. The +operation is performed according to the IEC/IEEE Standard for Binary +Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float128 float128_round_to_int( float128 a ) +{ + flag aSign; + int32 aExp; + bits64 lastBitMask, roundBitsMask; + int8 roundingMode; + float128 z; + + aExp = extractFloat128Exp( a ); + if ( 0x402F <= aExp ) { + if ( 0x406F <= aExp ) { + if ( ( aExp == 0x7FFF ) + && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) + ) { + return propagateFloat128NaN( a, a ); + } + return a; + } + lastBitMask = 1; + lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1; + roundBitsMask = lastBitMask - 1; + z = a; + roundingMode = float_rounding_mode; + if ( roundingMode == float_round_nearest_even ) { + if ( lastBitMask ) { + add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low ); + if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; + } + else { + if ( (sbits64) z.low < 0 ) { + ++z.high; + if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1; + } + } + } + else if ( roundingMode != float_round_to_zero ) { + if ( extractFloat128Sign( z ) + ^ ( roundingMode == float_round_up ) ) { + add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low ); + } + } + z.low &= ~ roundBitsMask; + } + else { + if ( aExp < 0x3FFF ) { + if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a; + set_float_exception_inexact_flag(); + aSign = extractFloat128Sign( a ); + switch ( float_rounding_mode ) { + case float_round_nearest_even: + if ( ( aExp == 0x3FFE ) + && ( extractFloat128Frac0( a ) + | extractFloat128Frac1( a ) ) + ) { + return packFloat128( aSign, 0x3FFF, 0, 0 ); + } + break; + case float_round_to_zero: + break; + case float_round_down: + return + aSign ? packFloat128( 1, 0x3FFF, 0, 0 ) + : packFloat128( 0, 0, 0, 0 ); + case float_round_up: + return + aSign ? packFloat128( 1, 0, 0, 0 ) + : packFloat128( 0, 0x3FFF, 0, 0 ); + } + return packFloat128( aSign, 0, 0, 0 ); + } + lastBitMask = 1; + lastBitMask <<= 0x402F - aExp; + roundBitsMask = lastBitMask - 1; + z.low = 0; + z.high = a.high; + roundingMode = float_rounding_mode; + if ( roundingMode == float_round_nearest_even ) { + z.high += lastBitMask>>1; + if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) { + z.high &= ~ lastBitMask; + } + } + else if ( roundingMode != float_round_to_zero ) { + if ( extractFloat128Sign( z ) + ^ ( roundingMode == float_round_up ) ) { + z.high |= ( a.low != 0 ); + z.high += roundBitsMask; + } + } + z.high &= ~ roundBitsMask; + } + if ( ( z.low != a.low ) || ( z.high != a.high ) ) { + set_float_exception_inexact_flag(); + } + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of adding the absolute values of the quadruple-precision +floating-point values `a' and `b'. If `zSign' is 1, the sum is negated +before being returned. `zSign' is ignored if the result is a NaN. +The addition is performed according to the IEC/IEEE Standard for Binary +Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float128 addFloat128Sigs( float128 a, float128 b, flag zSign ) +{ + int32 aExp, bExp, zExp; + bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; + int32 expDiff; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + bSig1 = extractFloat128Frac1( b ); + bSig0 = extractFloat128Frac0( b ); + bExp = extractFloat128Exp( b ); + expDiff = aExp - bExp; + if ( 0 < expDiff ) { + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig0 |= LIT64( 0x0001000000000000 ); + } + shift128ExtraRightJamming( + bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 ); + zExp = aExp; + } + else if ( expDiff < 0 ) { + if ( bExp == 0x7FFF ) { + if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); + return packFloat128( zSign, 0x7FFF, 0, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig0 |= LIT64( 0x0001000000000000 ); + } + shift128ExtraRightJamming( + aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 ); + zExp = bExp; + } + else { + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 | bSig0 | bSig1 ) { + return propagateFloat128NaN( a, b ); + } + return a; + } + add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); + if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 ); + zSig2 = 0; + zSig0 |= LIT64( 0x0002000000000000 ); + zExp = aExp; + goto shiftRight1; + } + aSig0 |= LIT64( 0x0001000000000000 ); + add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); + --zExp; + if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack; + ++zExp; + shiftRight1: + shift128ExtraRightJamming( + zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); + roundAndPack: + return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the absolute values of the quadruple- +precision floating-point values `a' and `b'. If `zSign' is 1, the +difference is negated before being returned. `zSign' is ignored if the +result is a NaN. The subtraction is performed according to the IEC/IEEE +Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float128 subFloat128Sigs( float128 a, float128 b, flag zSign ) +{ + int32 aExp, bExp, zExp; + bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1; + int32 expDiff; + float128 z; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + bSig1 = extractFloat128Frac1( b ); + bSig0 = extractFloat128Frac0( b ); + bExp = extractFloat128Exp( b ); + expDiff = aExp - bExp; + shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 ); + shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 ); + if ( 0 < expDiff ) goto aExpBigger; + if ( expDiff < 0 ) goto bExpBigger; + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 | bSig0 | bSig1 ) { + return propagateFloat128NaN( a, b ); + } + float_raise( float_flag_invalid ); + z.low = float128_default_nan_low; + z.high = float128_default_nan_high; + return z; + } + if ( aExp == 0 ) { + aExp = 1; + bExp = 1; + } + if ( bSig0 < aSig0 ) goto aBigger; + if ( aSig0 < bSig0 ) goto bBigger; + if ( bSig1 < aSig1 ) goto aBigger; + if ( aSig1 < bSig1 ) goto bBigger; + return packFloat128( float_rounding_mode == float_round_down, 0, 0, 0 ); + bExpBigger: + if ( bExp == 0x7FFF ) { + if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); + return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig0 |= LIT64( 0x4000000000000000 ); + } + shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); + bSig0 |= LIT64( 0x4000000000000000 ); + bBigger: + sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 ); + zExp = bExp; + zSign ^= 1; + goto normalizeRoundAndPack; + aExpBigger: + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig0 |= LIT64( 0x4000000000000000 ); + } + shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 ); + aSig0 |= LIT64( 0x4000000000000000 ); + aBigger: + sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); + zExp = aExp; + normalizeRoundAndPack: + --zExp; + return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of adding the quadruple-precision floating-point values +`a' and `b'. The operation is performed according to the IEC/IEEE Standard +for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float128 float128_add( float128 a, float128 b ) +{ + flag aSign, bSign; + + aSign = extractFloat128Sign( a ); + bSign = extractFloat128Sign( b ); + if ( aSign == bSign ) { + return addFloat128Sigs( a, b, aSign ); + } + else { + return subFloat128Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the quadruple-precision floating-point +values `a' and `b'. The operation is performed according to the IEC/IEEE +Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float128 float128_sub( float128 a, float128 b ) +{ + flag aSign, bSign; + + aSign = extractFloat128Sign( a ); + bSign = extractFloat128Sign( b ); + if ( aSign == bSign ) { + return subFloat128Sigs( a, b, aSign ); + } + else { + return addFloat128Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of multiplying the quadruple-precision floating-point +values `a' and `b'. The operation is performed according to the IEC/IEEE +Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float128 float128_mul( float128 a, float128 b ) +{ + flag aSign, bSign, zSign; + int32 aExp, bExp, zExp; + bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3; + float128 z; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + bSig1 = extractFloat128Frac1( b ); + bSig0 = extractFloat128Frac0( b ); + bExp = extractFloat128Exp( b ); + bSign = extractFloat128Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FFF ) { + if ( ( aSig0 | aSig1 ) + || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) { + return propagateFloat128NaN( a, b ); + } + if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid; + return packFloat128( zSign, 0x7FFF, 0, 0 ); + } + if ( bExp == 0x7FFF ) { + if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); + if ( ( aExp | aSig0 | aSig1 ) == 0 ) { + invalid: + float_raise( float_flag_invalid ); + z.low = float128_default_nan_low; + z.high = float128_default_nan_high; + return z; + } + return packFloat128( zSign, 0x7FFF, 0, 0 ); + } + if ( aExp == 0 ) { + if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); + normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + } + if ( bExp == 0 ) { + if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); + normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); + } + zExp = aExp + bExp - 0x4000; + aSig0 |= LIT64( 0x0001000000000000 ); + shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 ); + mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 ); + add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 ); + zSig2 |= ( zSig3 != 0 ); + if ( LIT64( 0x0002000000000000 ) <= zSig0 ) { + shift128ExtraRightJamming( + zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); + ++zExp; + } + return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of dividing the quadruple-precision floating-point value +`a' by the corresponding value `b'. The operation is performed according to +the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float128 float128_div( float128 a, float128 b ) +{ + flag aSign, bSign, zSign; + int32 aExp, bExp, zExp; + bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; + bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3; + float128 z; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + bSig1 = extractFloat128Frac1( b ); + bSig0 = extractFloat128Frac0( b ); + bExp = extractFloat128Exp( b ); + bSign = extractFloat128Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b ); + if ( bExp == 0x7FFF ) { + if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); + goto invalid; + } + return packFloat128( zSign, 0x7FFF, 0, 0 ); + } + if ( bExp == 0x7FFF ) { + if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); + return packFloat128( zSign, 0, 0, 0 ); + } + if ( bExp == 0 ) { + if ( ( bSig0 | bSig1 ) == 0 ) { + if ( ( aExp | aSig0 | aSig1 ) == 0 ) { + invalid: + float_raise( float_flag_invalid ); + z.low = float128_default_nan_low; + z.high = float128_default_nan_high; + return z; + } + float_raise( float_flag_divbyzero ); + return packFloat128( zSign, 0x7FFF, 0, 0 ); + } + normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); + } + if ( aExp == 0 ) { + if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); + normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + } + zExp = aExp - bExp + 0x3FFD; + shortShift128Left( + aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 ); + shortShift128Left( + bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 ); + if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) { + shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 ); + ++zExp; + } + zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 ); + mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 ); + sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 ); + while ( (sbits64) rem0 < 0 ) { + --zSig0; + add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 ); + } + zSig1 = estimateDiv128To64( rem1, rem2, bSig0 ); + if ( ( zSig1 & 0x3FFF ) <= 4 ) { + mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 ); + sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 ); + while ( (sbits64) rem1 < 0 ) { + --zSig1; + add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 ); + } + zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); + } + shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 ); + return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the remainder of the quadruple-precision floating-point value `a' +with respect to the corresponding value `b'. The operation is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float128 float128_rem( float128 a, float128 b ) +{ + flag aSign, zSign; + int32 aExp, bExp, expDiff; + bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2; + bits64 allZero, alternateASig0, alternateASig1, sigMean1; + sbits64 sigMean0; + float128 z; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + bSig1 = extractFloat128Frac1( b ); + bSig0 = extractFloat128Frac0( b ); + bExp = extractFloat128Exp( b ); + if ( aExp == 0x7FFF ) { + if ( ( aSig0 | aSig1 ) + || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) { + return propagateFloat128NaN( a, b ); + } + goto invalid; + } + if ( bExp == 0x7FFF ) { + if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + if ( ( bSig0 | bSig1 ) == 0 ) { + invalid: + float_raise( float_flag_invalid ); + z.low = float128_default_nan_low; + z.high = float128_default_nan_high; + return z; + } + normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); + } + if ( aExp == 0 ) { + if ( ( aSig0 | aSig1 ) == 0 ) return a; + normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + } + expDiff = aExp - bExp; + if ( expDiff < -1 ) return a; + shortShift128Left( + aSig0 | LIT64( 0x0001000000000000 ), + aSig1, + 15 - ( expDiff < 0 ), + &aSig0, + &aSig1 + ); + shortShift128Left( + bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 ); + q = le128( bSig0, bSig1, aSig0, aSig1 ); + if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); + expDiff -= 64; + while ( 0 < expDiff ) { + q = estimateDiv128To64( aSig0, aSig1, bSig0 ); + q = ( 4 < q ) ? q - 4 : 0; + mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 ); + shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero ); + shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero ); + sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 ); + expDiff -= 61; + } + if ( -64 < expDiff ) { + q = estimateDiv128To64( aSig0, aSig1, bSig0 ); + q = ( 4 < q ) ? q - 4 : 0; + q >>= - expDiff; + shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 ); + expDiff += 52; + if ( expDiff < 0 ) { + shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); + } + else { + shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 ); + } + mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 ); + sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 ); + } + else { + shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 ); + shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 ); + } + do { + alternateASig0 = aSig0; + alternateASig1 = aSig1; + ++q; + sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); + } while ( 0 <= (sbits64) aSig0 ); + add128( + aSig0, aSig1, alternateASig0, alternateASig1, (bits64 *)&sigMean0, &sigMean1 ); + if ( ( sigMean0 < 0 ) + || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) { + aSig0 = alternateASig0; + aSig1 = alternateASig1; + } + zSign = ( (sbits64) aSig0 < 0 ); + if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 ); + return + normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the square root of the quadruple-precision floating-point value `a'. +The operation is performed according to the IEC/IEEE Standard for Binary +Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float128 float128_sqrt( float128 a ) +{ + flag aSign; + int32 aExp, zExp; + bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0; + bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3; + float128 z; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a ); + if ( ! aSign ) return a; + goto invalid; + } + if ( aSign ) { + if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a; + invalid: + float_raise( float_flag_invalid ); + z.low = float128_default_nan_low; + z.high = float128_default_nan_high; + return z; + } + if ( aExp == 0 ) { + if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 ); + normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + } + zExp = (int32) ( (aExp - 0x3FFF) >> 1) + 0x3FFE; + aSig0 |= LIT64( 0x0001000000000000 ); + zSig0 = estimateSqrt32((int16)aExp, (bits32)(aSig0>>17)); + shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 ); + zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 ); + doubleZSig0 = zSig0<<1; + mul64To128( zSig0, zSig0, &term0, &term1 ); + sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 ); + while ( (sbits64) rem0 < 0 ) { + --zSig0; + doubleZSig0 -= 2; + add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 ); + } + zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 ); + if ( ( zSig1 & 0x1FFF ) <= 5 ) { + if ( zSig1 == 0 ) zSig1 = 1; + mul64To128( doubleZSig0, zSig1, &term1, &term2 ); + sub128( rem1, 0, term1, term2, &rem1, &rem2 ); + mul64To128( zSig1, zSig1, &term2, &term3 ); + sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); + while ( (sbits64) rem1 < 0 ) { + --zSig1; + shortShift128Left( 0, zSig1, 1, &term2, &term3 ); + term3 |= 1; + term2 |= doubleZSig0; + add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 ); + } + zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); + } + shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 ); + return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the quadruple-precision floating-point value `a' is equal to +the corresponding value `b', and 0 otherwise. The comparison is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float128_eq( float128 a, float128 b ) +{ + + if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) + && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) + || ( ( extractFloat128Exp( b ) == 0x7FFF ) + && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) + ) { + if ( float128_is_signaling_nan( a ) + || float128_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + return + ( a.low == b.low ) + && ( ( a.high == b.high ) + || ( ( a.low == 0 ) + && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) ) + ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the quadruple-precision floating-point value `a' is less than +or equal to the corresponding value `b', and 0 otherwise. The comparison +is performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float128_le( float128 a, float128 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) + && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) + || ( ( extractFloat128Exp( b ) == 0x7FFF ) + && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloat128Sign( a ); + bSign = extractFloat128Sign( b ); + if ( aSign != bSign ) { + return + aSign + || ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + == 0 ); + } + return + aSign ? le128( b.high, b.low, a.high, a.low ) + : le128( a.high, a.low, b.high, b.low ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the quadruple-precision floating-point value `a' is less than +the corresponding value `b', and 0 otherwise. The comparison is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float128_lt( float128 a, float128 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) + && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) + || ( ( extractFloat128Exp( b ) == 0x7FFF ) + && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloat128Sign( a ); + bSign = extractFloat128Sign( b ); + if ( aSign != bSign ) { + return + aSign + && ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + != 0 ); + } + return + aSign ? lt128( b.high, b.low, a.high, a.low ) + : lt128( a.high, a.low, b.high, b.low ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the quadruple-precision floating-point value `a' is equal to +the corresponding value `b', and 0 otherwise. The invalid exception is +raised if either operand is a NaN. Otherwise, the comparison is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float128_eq_signaling( float128 a, float128 b ) +{ + + if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) + && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) + || ( ( extractFloat128Exp( b ) == 0x7FFF ) + && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + return + ( a.low == b.low ) + && ( ( a.high == b.high ) + || ( ( a.low == 0 ) + && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) ) + ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the quadruple-precision floating-point value `a' is less than +or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not +cause an exception. Otherwise, the comparison is performed according to the +IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float128_le_quiet( float128 a, float128 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) + && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) + || ( ( extractFloat128Exp( b ) == 0x7FFF ) + && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) + ) { + if ( float128_is_signaling_nan( a ) + || float128_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloat128Sign( a ); + bSign = extractFloat128Sign( b ); + if ( aSign != bSign ) { + return + aSign + || ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + == 0 ); + } + return + aSign ? le128( b.high, b.low, a.high, a.low ) + : le128( a.high, a.low, b.high, b.low ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the quadruple-precision floating-point value `a' is less than +the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an +exception. Otherwise, the comparison is performed according to the IEC/IEEE +Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float128_lt_quiet( float128 a, float128 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) + && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) + || ( ( extractFloat128Exp( b ) == 0x7FFF ) + && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) + ) { + if ( float128_is_signaling_nan( a ) + || float128_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloat128Sign( a ); + bSign = extractFloat128Sign( b ); + if ( aSign != bSign ) { + return + aSign + && ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + != 0 ); + } + return + aSign ? lt128( b.high, b.low, a.high, a.low ) + : lt128( a.high, a.low, b.high, b.low ); + +} + +#endif + + +#if defined(SOFTFLOAT_FOR_GCC) && defined(SOFTFLOAT_NEED_FIXUNS) + +/* + * These two routines are not part of the original softfloat distribution. + * + * They are based on the corresponding conversions to integer but return + * unsigned numbers instead since these functions are required by GCC. + * + * Added by Mark Brinicombe 27/09/97 + * + * float64 version overhauled for SoftFloat 2a [bjh21 2000-07-15] + */ + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point value +`a' to the 32-bit unsigned integer format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic, except that the conversion is always rounded toward zero. If +`a' is a NaN, the largest positive integer is returned. If the conversion +overflows, the largest integer positive is returned. +------------------------------------------------------------------------------- +*/ +uint32 float64_to_uint32_round_to_zero( float64 a ) +{ + flag aSign; + int16 aExp, shiftCount; + bits64 aSig, savedASig; + uint32 z; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + + if (aSign) { + float_raise( float_flag_invalid ); + return(0); + } + + if ( 0x41E < aExp ) { + float_raise( float_flag_invalid ); + return 0xffffffff; + } + else if ( aExp < 0x3FF ) { + if ( aExp || aSig ) set_float_exception_inexact_flag(); + return 0; + } + aSig |= LIT64( 0x0010000000000000 ); + shiftCount = 0x433 - aExp; + savedASig = aSig; + aSig >>= shiftCount; + z = (uint32)aSig; + if ( ( aSig<>( - shiftCount ); + if ( aSig<<( shiftCount & 31 ) ) { + set_float_exception_inexact_flag(); + } + return z; + +} + +#endif diff --git a/StdLib/LibC/Softfloat/eqdf2.c b/StdLib/LibC/Softfloat/eqdf2.c new file mode 100644 index 0000000000..d76aae30e7 --- /dev/null +++ b/StdLib/LibC/Softfloat/eqdf2.c @@ -0,0 +1,38 @@ +/* $NetBSD: eqdf2.c,v 1.1 2000/06/06 08:15:02 bjh21 Exp $ */ + +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ + +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: eqdf2.c,v 1.1 2000/06/06 08:15:02 bjh21 Exp $"); +#endif /* LIBC_SCCS and not lint */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +flag __eqdf2(float64, float64); + +flag +__eqdf2(float64 a, float64 b) +{ + + /* libgcc1.c says !(a == b) */ + return !float64_eq(a, b); +} diff --git a/StdLib/LibC/Softfloat/eqsf2.c b/StdLib/LibC/Softfloat/eqsf2.c new file mode 100644 index 0000000000..973c85b4b0 --- /dev/null +++ b/StdLib/LibC/Softfloat/eqsf2.c @@ -0,0 +1,38 @@ +/* $NetBSD: eqsf2.c,v 1.1 2000/06/06 08:15:03 bjh21 Exp $ */ + +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ + +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: eqsf2.c,v 1.1 2000/06/06 08:15:03 bjh21 Exp $"); +#endif /* LIBC_SCCS and not lint */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +flag __eqsf2(float32, float32); + +flag +__eqsf2(float32 a, float32 b) +{ + + /* libgcc1.c says !(a == b) */ + return !float32_eq(a, b); +} diff --git a/StdLib/LibC/Softfloat/eqtf2.c b/StdLib/LibC/Softfloat/eqtf2.c new file mode 100644 index 0000000000..6aa1d509f7 --- /dev/null +++ b/StdLib/LibC/Softfloat/eqtf2.c @@ -0,0 +1,40 @@ +/* $NetBSD: eqtf2.c,v 1.1 2011/01/17 10:08:35 matt Exp $ */ + +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ + +/* + * Written by Matt Thomas, 2011. This file is in the Public Domain. + */ + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: eqtf2.c,v 1.1 2011/01/17 10:08:35 matt Exp $"); +#endif /* LIBC_SCCS and not lint */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#ifdef FLOAT128 +flag __eqtf2(float128, float128); + +flag +__eqtf2(float128 a, float128 b) +{ + + /* libgcc1.c says !(a == b) */ + return !float128_eq(a, b); +} +#endif /* FLOAT128 */ diff --git a/StdLib/LibC/Softfloat/fpgetmask.c b/StdLib/LibC/Softfloat/fpgetmask.c new file mode 100644 index 0000000000..fe0f623696 --- /dev/null +++ b/StdLib/LibC/Softfloat/fpgetmask.c @@ -0,0 +1,55 @@ +/* $NetBSD: fpgetmask.c,v 1.4 2008/04/28 20:23:00 martin Exp $ */ + +/*- + * Copyright (c) 1997 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Neil A. Carson and Mark Brinicombe + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: fpgetmask.c,v 1.4 2008/04/28 20:23:00 martin Exp $"); +#endif /* LIBC_SCCS and not lint */ + +#include "namespace.h" + +#include +#ifdef SOFTFLOAT_FOR_GCC +#include "softfloat-for-gcc.h" +#endif +#include "milieu.h" +#include "softfloat.h" + +#ifdef __weak_alias +__weak_alias(fpgetmask,_fpgetmask) +#endif + +fp_except +fpgetmask(void) +{ + + return float_exception_mask; +} diff --git a/StdLib/LibC/Softfloat/fpgetround.c b/StdLib/LibC/Softfloat/fpgetround.c new file mode 100644 index 0000000000..115ad50cec --- /dev/null +++ b/StdLib/LibC/Softfloat/fpgetround.c @@ -0,0 +1,55 @@ +/* $NetBSD: fpgetround.c,v 1.3 2008/04/28 20:23:00 martin Exp $ */ + +/*- + * Copyright (c) 1997 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Neil A. Carson and Mark Brinicombe + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: fpgetround.c,v 1.3 2008/04/28 20:23:00 martin Exp $"); +#endif /* LIBC_SCCS and not lint */ + +#include "namespace.h" + +#include +#ifdef SOFTFLOAT_FOR_GCC +#include "softfloat-for-gcc.h" +#endif +#include "milieu.h" +#include "softfloat.h" + +#ifdef __weak_alias +__weak_alias(fpgetround,_fpgetround) +#endif + +fp_rnd +fpgetround(void) +{ + + return float_rounding_mode; +} diff --git a/StdLib/LibC/Softfloat/fpgetsticky.c b/StdLib/LibC/Softfloat/fpgetsticky.c new file mode 100644 index 0000000000..5d5070942f --- /dev/null +++ b/StdLib/LibC/Softfloat/fpgetsticky.c @@ -0,0 +1,55 @@ +/* $NetBSD: fpgetsticky.c,v 1.3 2008/04/28 20:23:00 martin Exp $ */ + +/*- + * Copyright (c) 1997 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Neil A. Carson and Mark Brinicombe + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: fpgetsticky.c,v 1.3 2008/04/28 20:23:00 martin Exp $"); +#endif /* LIBC_SCCS and not lint */ + +#include "namespace.h" + +#include +#ifdef SOFTFLOAT_FOR_GCC +#include "softfloat-for-gcc.h" +#endif +#include "milieu.h" +#include "softfloat.h" + +#ifdef __weak_alias +__weak_alias(fpgetsticky,_fpgetsticky) +#endif + +fp_except +fpgetsticky(void) +{ + + return float_exception_flags; +} diff --git a/StdLib/LibC/Softfloat/fpsetmask.c b/StdLib/LibC/Softfloat/fpsetmask.c new file mode 100644 index 0000000000..87aa8b73d7 --- /dev/null +++ b/StdLib/LibC/Softfloat/fpsetmask.c @@ -0,0 +1,60 @@ +/* $NetBSD: fpsetmask.c,v 1.5 2013/01/10 08:16:10 matt Exp $ */ + +/*- + * Copyright (c) 1997 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Neil A. Carson and Mark Brinicombe + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: fpsetmask.c,v 1.5 2013/01/10 08:16:10 matt Exp $"); +#endif /* LIBC_SCCS and not lint */ + +#include "namespace.h" + +#include +#ifdef SOFTFLOAT_FOR_GCC +#include "softfloat-for-gcc.h" +#endif +#include "milieu.h" +#include "softfloat.h" + +#ifdef __weak_alias +__weak_alias(fpsetmask,_fpsetmask) +#endif + +fp_except +fpsetmask(fp_except mask) +{ +#ifdef set_float_exception_mask + return set_float_exception_mask(mask); +#else + const fp_except old = float_exception_mask; + float_exception_mask = mask; + return old; +#endif +} diff --git a/StdLib/LibC/Softfloat/fpsetround.c b/StdLib/LibC/Softfloat/fpsetround.c new file mode 100644 index 0000000000..3e3d9eb5d0 --- /dev/null +++ b/StdLib/LibC/Softfloat/fpsetround.c @@ -0,0 +1,60 @@ +/* $NetBSD: fpsetround.c,v 1.4 2013/01/10 08:16:10 matt Exp $ */ + +/*- + * Copyright (c) 1997 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Neil A. Carson and Mark Brinicombe + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: fpsetround.c,v 1.4 2013/01/10 08:16:10 matt Exp $"); +#endif /* LIBC_SCCS and not lint */ + +#include "namespace.h" + +#include +#ifdef SOFTFLOAT_FOR_GCC +#include "softfloat-for-gcc.h" +#endif +#include "milieu.h" +#include "softfloat.h" + +#ifdef __weak_alias +__weak_alias(fpsetround,_fpsetround) +#endif + +fp_rnd +fpsetround(fp_rnd rnd_dir) +{ +#ifdef set_float_rounding_mode + return set_float_rounding_mode(rnd_dir); +#else + const fp_rnd old = float_rounding_mode; + float_rounding_mode = rnd_dir; + return old; +#endif +} diff --git a/StdLib/LibC/Softfloat/fpsetsticky.c b/StdLib/LibC/Softfloat/fpsetsticky.c new file mode 100644 index 0000000000..742706278c --- /dev/null +++ b/StdLib/LibC/Softfloat/fpsetsticky.c @@ -0,0 +1,60 @@ +/* $NetBSD: fpsetsticky.c,v 1.4 2013/01/10 08:16:10 matt Exp $ */ + +/*- + * Copyright (c) 1997 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Neil A. Carson and Mark Brinicombe + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: fpsetsticky.c,v 1.4 2013/01/10 08:16:10 matt Exp $"); +#endif /* LIBC_SCCS and not lint */ + +#include "namespace.h" + +#include +#ifdef SOFTFLOAT_FOR_GCC +#include "softfloat-for-gcc.h" +#endif +#include "milieu.h" +#include "softfloat.h" + +#ifdef __weak_alias +__weak_alias(fpsetsticky,_fpsetsticky) +#endif + +fp_except +fpsetsticky(fp_except except) +{ +#ifdef set_float_exception_flags + return set_float_exception_flags(except, 1); +#else + const fp_except old = float_exception_flags; + float_exception_flags = except; + return old; +#endif +} diff --git a/StdLib/LibC/Softfloat/gedf2.c b/StdLib/LibC/Softfloat/gedf2.c new file mode 100644 index 0000000000..b9e39f2ada --- /dev/null +++ b/StdLib/LibC/Softfloat/gedf2.c @@ -0,0 +1,38 @@ +/* $NetBSD: gedf2.c,v 1.1 2000/06/06 08:15:05 bjh21 Exp $ */ + +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ + +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: gedf2.c,v 1.1 2000/06/06 08:15:05 bjh21 Exp $"); +#endif /* LIBC_SCCS and not lint */ + +flag __gedf2(float64, float64); + +flag +__gedf2(float64 a, float64 b) +{ + + /* libgcc1.c says (a >= b) - 1 */ + return float64_le(b, a) - 1; +} diff --git a/StdLib/LibC/Softfloat/gesf2.c b/StdLib/LibC/Softfloat/gesf2.c new file mode 100644 index 0000000000..b68dd04332 --- /dev/null +++ b/StdLib/LibC/Softfloat/gesf2.c @@ -0,0 +1,38 @@ +/* $NetBSD: gesf2.c,v 1.1 2000/06/06 08:15:05 bjh21 Exp $ */ + +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ + +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: gesf2.c,v 1.1 2000/06/06 08:15:05 bjh21 Exp $"); +#endif /* LIBC_SCCS and not lint */ + +flag __gesf2(float32, float32); + +flag +__gesf2(float32 a, float32 b) +{ + + /* libgcc1.c says (a >= b) - 1 */ + return float32_le(b, a) - 1; +} diff --git a/StdLib/LibC/Softfloat/getf2.c b/StdLib/LibC/Softfloat/getf2.c new file mode 100644 index 0000000000..0800cc893e --- /dev/null +++ b/StdLib/LibC/Softfloat/getf2.c @@ -0,0 +1,40 @@ +/* $NetBSD: getf2.c,v 1.1 2011/01/17 10:08:35 matt Exp $ */ +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ +/* + * Written by Matt Thomas, 2011. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: getf2.c,v 1.1 2011/01/17 10:08:35 matt Exp $"); +#endif /* LIBC_SCCS and not lint */ + +#ifdef FLOAT128 + +flag __getf2(float128, float128); + +flag +__getf2(float128 a, float128 b) +{ + + /* libgcc1.c says (a >= b) - 1 */ + return float128_le(b, a) - 1; +} + +#endif /* FLOAT128 */ diff --git a/StdLib/LibC/Softfloat/gexf2.c b/StdLib/LibC/Softfloat/gexf2.c new file mode 100644 index 0000000000..e34158c8cd --- /dev/null +++ b/StdLib/LibC/Softfloat/gexf2.c @@ -0,0 +1,39 @@ +/* $NetBSD: gexf2.c,v 1.2 2004/09/27 10:16:24 he Exp $ */ +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: gexf2.c,v 1.2 2004/09/27 10:16:24 he Exp $"); +#endif /* LIBC_SCCS and not lint */ + +#ifdef FLOATX80 + +flag __gexf2(floatx80, floatx80); + +flag +__gexf2(floatx80 a, floatx80 b) +{ + + /* libgcc1.c says (a >= b) - 1 */ + return floatx80_le(b, a) - 1; +} +#endif /* FLOATX80 */ diff --git a/StdLib/LibC/Softfloat/gtdf2.c b/StdLib/LibC/Softfloat/gtdf2.c new file mode 100644 index 0000000000..6c6db225cd --- /dev/null +++ b/StdLib/LibC/Softfloat/gtdf2.c @@ -0,0 +1,36 @@ +/* $NetBSD: gtdf2.c,v 1.1 2000/06/06 08:15:05 bjh21 Exp $ */ +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: gtdf2.c,v 1.1 2000/06/06 08:15:05 bjh21 Exp $"); +#endif /* LIBC_SCCS and not lint */ + +flag __gtdf2(float64, float64); + +flag +__gtdf2(float64 a, float64 b) +{ + + /* libgcc1.c says a > b */ + return float64_lt(b, a); +} diff --git a/StdLib/LibC/Softfloat/gtsf2.c b/StdLib/LibC/Softfloat/gtsf2.c new file mode 100644 index 0000000000..7b18f3ca75 --- /dev/null +++ b/StdLib/LibC/Softfloat/gtsf2.c @@ -0,0 +1,36 @@ +/* $NetBSD: gtsf2.c,v 1.1 2000/06/06 08:15:06 bjh21 Exp $ */ +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: gtsf2.c,v 1.1 2000/06/06 08:15:06 bjh21 Exp $"); +#endif /* LIBC_SCCS and not lint */ + +flag __gtsf2(float32, float32); + +flag +__gtsf2(float32 a, float32 b) +{ + + /* libgcc1.c says a > b */ + return float32_lt(b, a); +} diff --git a/StdLib/LibC/Softfloat/gttf2.c b/StdLib/LibC/Softfloat/gttf2.c new file mode 100644 index 0000000000..7fdefd8ee3 --- /dev/null +++ b/StdLib/LibC/Softfloat/gttf2.c @@ -0,0 +1,40 @@ +/* $NetBSD: gttf2.c,v 1.1 2011/01/17 10:08:35 matt Exp $ */ +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ +/* + * Written by Matt Thomas, 2011. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: gttf2.c,v 1.1 2011/01/17 10:08:35 matt Exp $"); +#endif /* LIBC_SCCS and not lint */ + +#ifdef FLOAT128 + +flag __gttf2(float128, float128); + +flag +__gttf2(float128 a, float128 b) +{ + + /* libgcc1.c says a > b */ + return float128_lt(b, a); +} + +#endif /* FLOAT128 */ diff --git a/StdLib/LibC/Softfloat/gtxf2.c b/StdLib/LibC/Softfloat/gtxf2.c new file mode 100644 index 0000000000..11e2211f3f --- /dev/null +++ b/StdLib/LibC/Softfloat/gtxf2.c @@ -0,0 +1,39 @@ +/* $NetBSD: gtxf2.c,v 1.2 2004/09/27 10:16:24 he Exp $ */ +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: gtxf2.c,v 1.2 2004/09/27 10:16:24 he Exp $"); +#endif /* LIBC_SCCS and not lint */ + +#ifdef FLOATX80 + +flag __gtxf2(floatx80, floatx80); + +flag +__gtxf2(floatx80 a, floatx80 b) +{ + + /* libgcc1.c says a > b */ + return floatx80_lt(b, a); +} +#endif /* FLOATX80 */ diff --git a/StdLib/LibC/Softfloat/ledf2.c b/StdLib/LibC/Softfloat/ledf2.c new file mode 100644 index 0000000000..e6b910171b --- /dev/null +++ b/StdLib/LibC/Softfloat/ledf2.c @@ -0,0 +1,36 @@ +/* $NetBSD: ledf2.c,v 1.1 2000/06/06 08:15:06 bjh21 Exp $ */ +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: ledf2.c,v 1.1 2000/06/06 08:15:06 bjh21 Exp $"); +#endif /* LIBC_SCCS and not lint */ + +flag __ledf2(float64, float64); + +flag +__ledf2(float64 a, float64 b) +{ + + /* libgcc1.c says 1 - (a <= b) */ + return 1 - float64_le(a, b); +} diff --git a/StdLib/LibC/Softfloat/lesf2.c b/StdLib/LibC/Softfloat/lesf2.c new file mode 100644 index 0000000000..b26202c9e6 --- /dev/null +++ b/StdLib/LibC/Softfloat/lesf2.c @@ -0,0 +1,36 @@ +/* $NetBSD: lesf2.c,v 1.1 2000/06/06 08:15:06 bjh21 Exp $ */ +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: lesf2.c,v 1.1 2000/06/06 08:15:06 bjh21 Exp $"); +#endif /* LIBC_SCCS and not lint */ + +flag __lesf2(float32, float32); + +flag +__lesf2(float32 a, float32 b) +{ + + /* libgcc1.c says 1 - (a <= b) */ + return 1 - float32_le(a, b); +} diff --git a/StdLib/LibC/Softfloat/letf2.c b/StdLib/LibC/Softfloat/letf2.c new file mode 100644 index 0000000000..13695a1e49 --- /dev/null +++ b/StdLib/LibC/Softfloat/letf2.c @@ -0,0 +1,40 @@ +/* $NetBSD: letf2.c,v 1.1 2011/01/17 10:08:35 matt Exp $ */ +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ +/* + * Written by Matt Thomas, 2011. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: letf2.c,v 1.1 2011/01/17 10:08:35 matt Exp $"); +#endif /* LIBC_SCCS and not lint */ + +#ifdef FLOAT128 + +flag __letf2(float128, float128); + +flag +__letf2(float128 a, float128 b) +{ + + /* libgcc1.c says 1 - (a <= b) */ + return 1 - float128_le(a, b); +} + +#endif /* FLOAT128 */ diff --git a/StdLib/LibC/Softfloat/ltdf2.c b/StdLib/LibC/Softfloat/ltdf2.c new file mode 100644 index 0000000000..8c47648d37 --- /dev/null +++ b/StdLib/LibC/Softfloat/ltdf2.c @@ -0,0 +1,36 @@ +/* $NetBSD: ltdf2.c,v 1.1 2000/06/06 08:15:06 bjh21 Exp $ */ +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: ltdf2.c,v 1.1 2000/06/06 08:15:06 bjh21 Exp $"); +#endif /* LIBC_SCCS and not lint */ + +flag __ltdf2(float64, float64); + +flag +__ltdf2(float64 a, float64 b) +{ + + /* libgcc1.c says -(a < b) */ + return -float64_lt(a, b); +} diff --git a/StdLib/LibC/Softfloat/ltsf2.c b/StdLib/LibC/Softfloat/ltsf2.c new file mode 100644 index 0000000000..72081e9a2a --- /dev/null +++ b/StdLib/LibC/Softfloat/ltsf2.c @@ -0,0 +1,36 @@ +/* $NetBSD: ltsf2.c,v 1.1 2000/06/06 08:15:06 bjh21 Exp $ */ +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: ltsf2.c,v 1.1 2000/06/06 08:15:06 bjh21 Exp $"); +#endif /* LIBC_SCCS and not lint */ + +flag __ltsf2(float32, float32); + +flag +__ltsf2(float32 a, float32 b) +{ + + /* libgcc1.c says -(a < b) */ + return -float32_lt(a, b); +} diff --git a/StdLib/LibC/Softfloat/lttf2.c b/StdLib/LibC/Softfloat/lttf2.c new file mode 100644 index 0000000000..ba1306a5e2 --- /dev/null +++ b/StdLib/LibC/Softfloat/lttf2.c @@ -0,0 +1,40 @@ +/* $NetBSD: lttf2.c,v 1.1 2011/01/17 10:08:35 matt Exp $ */ +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ +/* + * Written by Matt Thomas, 2011. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: lttf2.c,v 1.1 2011/01/17 10:08:35 matt Exp $"); +#endif /* LIBC_SCCS and not lint */ + +#ifdef FLOAT128 + +flag __lttf2(float128, float128); + +flag +__lttf2(float128 a, float128 b) +{ + + /* libgcc1.c says -(a < b) */ + return -float128_lt(a, b); +} + +#endif /* FLOAT128 */ diff --git a/StdLib/LibC/Softfloat/nedf2.c b/StdLib/LibC/Softfloat/nedf2.c new file mode 100644 index 0000000000..89bfcb1af7 --- /dev/null +++ b/StdLib/LibC/Softfloat/nedf2.c @@ -0,0 +1,36 @@ +/* $NetBSD: nedf2.c,v 1.1 2000/06/06 08:15:07 bjh21 Exp $ */ +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: nedf2.c,v 1.1 2000/06/06 08:15:07 bjh21 Exp $"); +#endif /* LIBC_SCCS and not lint */ + +flag __nedf2(float64, float64); + +flag +__nedf2(float64 a, float64 b) +{ + + /* libgcc1.c says a != b */ + return !float64_eq(a, b); +} diff --git a/StdLib/LibC/Softfloat/negdf2.c b/StdLib/LibC/Softfloat/negdf2.c new file mode 100644 index 0000000000..926133be18 --- /dev/null +++ b/StdLib/LibC/Softfloat/negdf2.c @@ -0,0 +1,36 @@ +/* $NetBSD: negdf2.c,v 1.1 2000/06/06 08:15:07 bjh21 Exp $ */ +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: negdf2.c,v 1.1 2000/06/06 08:15:07 bjh21 Exp $"); +#endif /* LIBC_SCCS and not lint */ + +float64 __negdf2(float64); + +float64 +__negdf2(float64 a) +{ + + /* libgcc1.c says -a */ + return a ^ FLOAT64_MANGLE(0x8000000000000000ULL); +} diff --git a/StdLib/LibC/Softfloat/negsf2.c b/StdLib/LibC/Softfloat/negsf2.c new file mode 100644 index 0000000000..20054a4ab8 --- /dev/null +++ b/StdLib/LibC/Softfloat/negsf2.c @@ -0,0 +1,36 @@ +/* $NetBSD: negsf2.c,v 1.1 2000/06/06 08:15:07 bjh21 Exp $ */ +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: negsf2.c,v 1.1 2000/06/06 08:15:07 bjh21 Exp $"); +#endif /* LIBC_SCCS and not lint */ + +float32 __negsf2(float32); + +float32 +__negsf2(float32 a) +{ + + /* libgcc1.c says INTIFY(-a) */ + return a ^ 0x80000000; +} diff --git a/StdLib/LibC/Softfloat/negtf2.c b/StdLib/LibC/Softfloat/negtf2.c new file mode 100644 index 0000000000..65b19bd165 --- /dev/null +++ b/StdLib/LibC/Softfloat/negtf2.c @@ -0,0 +1,41 @@ +/* $NetBSD: negtf2.c,v 1.1 2011/01/17 10:08:35 matt Exp $ */ +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ +/* + * Written by Matt Thomas, 2011. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: negtf2.c,v 1.1 2011/01/17 10:08:35 matt Exp $"); +#endif /* LIBC_SCCS and not lint */ + +#ifdef FLOAT128 + +float128 __negtf2(float128); + +float128 +__negtf2(float128 a) +{ + + /* libgcc1.c says -a */ + a.high ^= FLOAT64_MANGLE(0x8000000000000000ULL); + return a; +} + +#endif /* FLOAT128 */ diff --git a/StdLib/LibC/Softfloat/negxf2.c b/StdLib/LibC/Softfloat/negxf2.c new file mode 100644 index 0000000000..c640415be5 --- /dev/null +++ b/StdLib/LibC/Softfloat/negxf2.c @@ -0,0 +1,39 @@ +/* $NetBSD: negxf2.c,v 1.2 2004/09/27 10:16:24 he Exp $ */ +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: negxf2.c,v 1.2 2004/09/27 10:16:24 he Exp $"); +#endif /* LIBC_SCCS and not lint */ + +#ifdef FLOATX80 + +floatx80 __negxf2(floatx80); + +floatx80 +__negxf2(floatx80 a) +{ + + /* libgcc1.c says -a */ + return __mulxf3(a,__floatsixf(-1)); +} +#endif /* FLOATX80 */ diff --git a/StdLib/LibC/Softfloat/nesf2.c b/StdLib/LibC/Softfloat/nesf2.c new file mode 100644 index 0000000000..40b160d12b --- /dev/null +++ b/StdLib/LibC/Softfloat/nesf2.c @@ -0,0 +1,36 @@ +/* $NetBSD: nesf2.c,v 1.1 2000/06/06 08:15:07 bjh21 Exp $ */ +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: nesf2.c,v 1.1 2000/06/06 08:15:07 bjh21 Exp $"); +#endif /* LIBC_SCCS and not lint */ + +flag __nesf2(float32, float32); + +flag +__nesf2(float32 a, float32 b) +{ + + /* libgcc1.c says a != b */ + return !float32_eq(a, b); +} diff --git a/StdLib/LibC/Softfloat/netf2.c b/StdLib/LibC/Softfloat/netf2.c new file mode 100644 index 0000000000..48cd0be5fc --- /dev/null +++ b/StdLib/LibC/Softfloat/netf2.c @@ -0,0 +1,40 @@ +/* $NetBSD: netf2.c,v 1.1 2011/01/17 10:08:35 matt Exp $ */ +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ +/* + * Written by Matt Thomas, 2011. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: netf2.c,v 1.1 2011/01/17 10:08:35 matt Exp $"); +#endif /* LIBC_SCCS and not lint */ + +#ifdef FLOAT128 + +flag __netf2(float128, float128); + +flag +__netf2(float128 a, float128 b) +{ + + /* libgcc1.c says a != b */ + return !float128_eq(a, b); +} + +#endif /* FLOAT128 */ diff --git a/StdLib/LibC/Softfloat/nexf2.c b/StdLib/LibC/Softfloat/nexf2.c new file mode 100644 index 0000000000..d9ecc73673 --- /dev/null +++ b/StdLib/LibC/Softfloat/nexf2.c @@ -0,0 +1,39 @@ +/* $NetBSD: nexf2.c,v 1.2 2004/09/27 10:16:24 he Exp $ */ +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: nexf2.c,v 1.2 2004/09/27 10:16:24 he Exp $"); +#endif /* LIBC_SCCS and not lint */ + +#ifdef FLOATX80 + +flag __nexf2(floatx80, floatx80); + +flag +__nexf2(floatx80 a, floatx80 b) +{ + + /* libgcc1.c says a != b */ + return !floatx80_eq(a, b); +} +#endif /* FLOATX80 */ diff --git a/StdLib/LibC/Softfloat/softfloat-for-gcc.h b/StdLib/LibC/Softfloat/softfloat-for-gcc.h new file mode 100644 index 0000000000..420cecc298 --- /dev/null +++ b/StdLib/LibC/Softfloat/softfloat-for-gcc.h @@ -0,0 +1,242 @@ +/* $NetBSD: softfloat-for-gcc.h,v 1.12 2013/08/01 23:21:19 matt Exp $ */ +/*- + * Copyright (c) 2008 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Move private identifiers with external linkage into implementation + * namespace. -- Klaus Klein , May 5, 1999 + */ +#define float_exception_flags _softfloat_float_exception_flags +#define float_exception_mask _softfloat_float_exception_mask +#define float_rounding_mode _softfloat_float_rounding_mode +#define float_raise _softfloat_float_raise + +/* The following batch are called by GCC through wrappers */ +#define float32_eq _softfloat_float32_eq +#define float32_le _softfloat_float32_le +#define float32_lt _softfloat_float32_lt +#define float64_eq _softfloat_float64_eq +#define float64_le _softfloat_float64_le +#define float64_lt _softfloat_float64_lt +#define float128_eq _softfloat_float128_eq +#define float128_le _softfloat_float128_le +#define float128_lt _softfloat_float128_lt + +/* + * Macros to define functions with the GCC expected names + */ + +#define float32_add __addsf3 +#define float64_add __adddf3 +#define floatx80_add __addxf3 +#define float128_add __addtf3 + +#define float32_sub __subsf3 +#define float64_sub __subdf3 +#define floatx80_sub __subxf3 +#define float128_sub __subtf3 + +#define float32_mul __mulsf3 +#define float64_mul __muldf3 +#define floatx80_mul __mulxf3 +#define float128_mul __multf3 + +#define float32_div __divsf3 +#define float64_div __divdf3 +#define floatx80_div __divxf3 +#define float128_div __divtf3 + +#if 0 +#define float32_neg __negsf2 +#define float64_neg __negdf2 +#define floatx80_neg __negxf2 +#define float128_neg __negtf2 +#endif + +#define int32_to_float32 __floatsisf +#define int32_to_float64 __floatsidf +#define int32_to_floatx80 __floatsixf +#define int32_to_float128 __floatsitf + +#define int64_to_float32 __floatdisf +#define int64_to_float64 __floatdidf +#define int64_to_floatx80 __floatdixf +#define int64_to_float128 __floatditf + +#define int128_to_float32 __floattisf +#define int128_to_float64 __floattidf +#define int128_to_floatx80 __floattixf +#define int128_to_float128 __floattitf + +#define uint32_to_float32 __floatunsisf +#define uint32_to_float64 __floatunsidf +#define uint32_to_floatx80 __floatunsixf +#define uint32_to_float128 __floatunsitf + +#define uint64_to_float32 __floatundisf +#define uint64_to_float64 __floatundidf +#define uint64_to_floatx80 __floatundixf +#define uint64_to_float128 __floatunditf + +#define uint128_to_float32 __floatuntisf +#define uint128_to_float64 __floatuntidf +#define uint128_to_floatx80 __floatuntixf +#define uint128_to_float128 __floatuntitf + +#define float32_to_int32_round_to_zero __fixsfsi +#define float64_to_int32_round_to_zero __fixdfsi +#define floatx80_to_int32_round_to_zero __fixxfsi +#define float128_to_int32_round_to_zero __fixtfsi + +#define float32_to_int64_round_to_zero __fixsfdi +#define float64_to_int64_round_to_zero __fixdfdi +#define floatx80_to_int64_round_to_zero __fixxfdi +#define float128_to_int64_round_to_zero __fixtfdi + +#define float32_to_int128_round_to_zero __fixsfti +#define float64_to_int128_round_to_zero __fixdfti +#define floatx80_to_int128_round_to_zero __fixxfti +#define float128_to_int128_round_to_zero __fixtfti + +#define float32_to_uint32_round_to_zero __fixunssfsi +#define float64_to_uint32_round_to_zero __fixunsdfsi +#define floatx80_to_uint32_round_to_zero __fixunsxfsi +#define float128_to_uint32_round_to_zero __fixunstfsi + +#define float32_to_uint64_round_to_zero __fixunssfdi +#define float64_to_uint64_round_to_zero __fixunsdfdi +#define floatx80_to_uint64_round_to_zero __fixunsxfdi +#define float128_to_uint64_round_to_zero __fixunstfdi + +#define float32_to_uint128_round_to_zero __fixunssfti +#define float64_to_uint128_round_to_zero __fixunsdfti +#define floatx80_to_uint128_round_to_zero __fixunsxfti +#define float128_to_uint128_round_to_zero __fixunstfti + +#define float32_to_float64 __extendsfdf2 +#define float32_to_floatx80 __extendsfxf2 +#define float32_to_float128 __extendsftf2 +#define float64_to_floatx80 __extenddfxf2 +#define float64_to_float128 __extenddftf2 + +#define float128_to_float64 __trunctfdf2 +#define floatx80_to_float64 __truncxfdf2 +#define float128_to_float32 __trunctfsf2 +#define floatx80_to_float32 __truncxfsf2 +#define float64_to_float32 __truncdfsf2 + +#if 0 +#define float32_cmp __cmpsf2 +#define float32_unord __unordsf2 +#define float32_eq __eqsf2 +#define float32_ne __nesf2 +#define float32_ge __gesf2 +#define float32_lt __ltsf2 +#define float32_le __lesf2 +#define float32_gt __gtsf2 +#endif + +#if 0 +#define float64_cmp __cmpdf2 +#define float64_unord __unorddf2 +#define float64_eq __eqdf2 +#define float64_ne __nedf2 +#define float64_ge __gedf2 +#define float64_lt __ltdf2 +#define float64_le __ledf2 +#define float64_gt __gtdf2 +#endif + +/* XXX not in libgcc */ +#if 1 +#define floatx80_cmp __cmpxf2 +#define floatx80_unord __unordxf2 +#define floatx80_eq __eqxf2 +#define floatx80_ne __nexf2 +#define floatx80_ge __gexf2 +#define floatx80_lt __ltxf2 +#define floatx80_le __lexf2 +#define floatx80_gt __gtxf2 +#endif + +#if 0 +#define float128_cmp __cmptf2 +#define float128_unord __unordtf2 +#define float128_eq __eqtf2 +#define float128_ne __netf2 +#define float128_ge __getf2 +#define float128_lt __lttf2 +#define float128_le __letf2 +#define float128_gt __gttf2 +#endif + +#ifdef __ARM_EABI__ +#ifdef __ARM_PCS_VFP +#include +#endif +#define __addsf3 __aeabi_fadd +#define __adddf3 __aeabi_dadd + +#define __subsf3 __aeabi_fsub +#define __subdf3 __aeabi_dsub + +#define __mulsf3 __aeabi_fmul +#define __muldf3 __aeabi_dmul + +#define __divsf3 __aeabi_fdiv +#define __divdf3 __aeabi_ddiv + +#define __floatsisf __aeabi_i2f +#define __floatsidf __aeabi_i2d + +#define __floatdisf __aeabi_l2f +#define __floatdidf __aeabi_l2d + +#define __floatunsisf __aeabi_ui2f +#define __floatunsidf __aeabi_ui2d + +#define __floatundisf __aeabi_ul2f +#define __floatundidf __aeabi_ul2d + +#define __fixsfsi __aeabi_f2iz +#define __fixdfsi __aeabi_d2iz + +#define __fixsfdi __aeabi_f2lz +#define __fixdfdi __aeabi_d2lz + +#define __fixunssfsi __aeabi_f2uiz +#define __fixunsdfsi __aeabi_d2uiz + +#define __fixunssfdi __aeabi_f2ulz +#define __fixunsdfdi __aeabi_d2ulz + +#define __extendsfdf2 __aeabi_f2d +#define __truncdfsf2 __aeabi_d2f + +#endif /* __ARM_EABI__ */ diff --git a/StdLib/LibC/Softfloat/softfloat-history.txt b/StdLib/LibC/Softfloat/softfloat-history.txt new file mode 100644 index 0000000000..5e2732a8fa --- /dev/null +++ b/StdLib/LibC/Softfloat/softfloat-history.txt @@ -0,0 +1,52 @@ +$NetBSD: softfloat-history.txt,v 1.1 2000/06/06 08:15:08 bjh21 Exp $ + +History of Major Changes to SoftFloat, up to Release 2a + +John R. Hauser +1998 December 16 + + +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Release 2a (1998 December) + +-- Added functions to convert between 64-bit integers (int64) and all + supported floating-point formats. + +-- Fixed a bug in all 64-bit-version square root functions except + `float32_sqrt' that caused the result sometimes to be off by 1 unit in + the last place (1 ulp) from what it should be. (Bug discovered by Paul + Donahue.) + +-- Improved the makefiles. + +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Release 2 (1997 June) + +-- Created the 64-bit (bits64) version, adding the floatx80 and float128 + formats. + +-- Changed the source directory structure, splitting the sources into a + `bits32' and a `bits64' version. Renamed `environment.h' to `milieu.h' + (to avoid confusion with environment variables). + +-- Fixed a small error that caused `float64_round_to_int' often to round the + wrong way in nearest/even mode when the operand was between 2^20 and 2^21 + and halfway between two integers. + +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Release 1a (1996 July) + +-- Corrected a mistake that caused borderline underflow cases not to raise + the underflow flag when they should have. (Problem reported by Doug + Priest.) + +-- Added the `float_detect_tininess' variable to control whether tininess is + detected before or after rounding. + +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Release 1 (1996 July) + +-- Original release. + +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + diff --git a/StdLib/LibC/Softfloat/softfloat-source.txt b/StdLib/LibC/Softfloat/softfloat-source.txt new file mode 100644 index 0000000000..e77f77a1d0 --- /dev/null +++ b/StdLib/LibC/Softfloat/softfloat-source.txt @@ -0,0 +1,383 @@ +$NetBSD: softfloat-source.txt,v 1.2 2006/11/24 19:46:58 christos Exp $ + +SoftFloat Release 2a Source Documentation + +John R. Hauser +1998 December 14 + + +------------------------------------------------------------------------------- +Introduction + +SoftFloat is a software implementation of floating-point that conforms to +the IEC/IEEE Standard for Binary Floating-Point Arithmetic. SoftFloat can +support four floating-point formats: single precision, double precision, +extended double precision, and quadruple precision. All operations required +by the IEEE Standard are implemented, except for conversions to and from +decimal. SoftFloat is distributed in the form of C source code, so a +C compiler is needed to compile the code. Support for the extended double- +precision and quadruple-precision formats is dependent on the C compiler +implementing a 64-bit integer type. + +This document gives information needed for compiling and/or porting +SoftFloat. + +The source code for SoftFloat is intended to be relatively machine- +independent and should be compilable using any ISO/ANSI C compiler. At the +time of this writing, SoftFloat has been successfully compiled with the GNU +C Compiler (`gcc') for several platforms. + + +------------------------------------------------------------------------------- +Limitations + +SoftFloat as written requires an ISO/ANSI-style C compiler. No attempt has +been made to accommodate compilers that are not ISO-conformant. Older ``K&R- +style'' compilers are not adequate for compiling SoftFloat. All testing I +have done so far has been with the GNU C Compiler. Compilation with other +compilers should be possible but has not been tested. + +The SoftFloat sources assume that source code file names can be longer than +8 characters. In order to compile under an MS-DOS-type system, many of the +source files will need to be renamed, and the source and makefiles edited +appropriately. Once compiled, the SoftFloat binary does not depend on the +existence of long file names. + +The underlying machine is assumed to be binary with a word size that is a +power of 2. Bytes are 8 bits. Support for the extended double-precision +and quadruple-precision formats depends on the C compiler implementing +a 64-bit integer type. If the largest integer type supported by the +C compiler is 32 bits, SoftFloat is limited to the single- and double- +precision formats. + + +------------------------------------------------------------------------------- +Contents + + Introduction + Limitations + Contents + Legal Notice + SoftFloat Source Directory Structure + SoftFloat Source Files + processors/*.h + softfloat/bits*/*/softfloat.h + softfloat/bits*/*/milieu.h + softfloat/bits*/*/softfloat-specialize + softfloat/bits*/softfloat-macros + softfloat/bits*/softfloat.c + Steps to Creating a `softfloat.o' + Making `softfloat.o' a Library + Testing SoftFloat + Timing SoftFloat + Compiler Options and Efficiency + Processor-Specific Optimization of `softfloat.c' Using `softfloat-macros' + Contact Information + + + +------------------------------------------------------------------------------- +Legal Notice + +SoftFloat was written by John R. Hauser. This work was made possible in +part by the International Computer Science Institute, located at Suite 600, +1947 Center Street, Berkeley, California 94704. Funding was partially +provided by the National Science Foundation under grant MIP-9311980. The +original version of this code was written as part of a project to build +a fixed-point vector processor in collaboration with the University of +California at Berkeley, overseen by Profs. Nelson Morgan and John Wawrzynek. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + + +------------------------------------------------------------------------------- +SoftFloat Source Directory Structure + +Because SoftFloat is targeted to multiple platforms, its source code +is slightly scattered between target-specific and target-independent +directories and files. The directory structure is as follows: + + processors + softfloat + bits64 + templates + 386-Win32-gcc + SPARC-Solaris-gcc + bits32 + templates + 386-Win32-gcc + SPARC-Solaris-gcc + +The two topmost directories and their contents are: + + softfloat - Most of the source code needed for SoftFloat. + processors - Target-specific header files that are not specific to + SoftFloat. + +The `softfloat' directory is further split into two parts: + + bits64 - SoftFloat implementation using 64-bit integers. + bits32 - SoftFloat implementation using only 32-bit integers. + +Within these directories are subdirectories for each of the targeted +platforms. The SoftFloat source code is distributed with targets +`386-Win32-gcc' and `SPARC-Solaris-gcc' (and perhaps others) already +prepared for both the 32-bit and 64-bit implementations. Source files that +are not within these target-specific subdirectories are intended to be +target-independent. + +The naming convention used for the target-specific directories is +`--'. The names of the supplied +target directories should be interpreted as follows: + + : + 386 - Intel 386-compatible processor. + SPARC - SPARC processor (as used by Sun machines). + : + Win32 - Microsoft Win32 executable. + Solaris - Sun Solaris executable. + : + gcc - GNU C Compiler. + +You do not need to maintain this convention if you do not want to. + +Alongside the supplied target-specific directories is a `templates' +directory containing a set of ``generic'' target-specific source files. A +new target directory can be created by copying the `templates' directory and +editing the files inside. (Complete instructions for porting SoftFloat to a +new target are in the section _Steps_to_Creating_a_`softfloat.o'_.) Note +that the `templates' directory will not work as a target directory without +some editing. To avoid confusion, it would be wise to refrain from editing +the files inside `templates' directly. + + +------------------------------------------------------------------------------- +SoftFloat Source Files + +The purpose of each source file is described below. In the following, +the `*' symbol is used in place of the name of a specific target, such as +`386-Win32-gcc' or `SPARC-Solaris-gcc', or in place of some other text, as +in `bits*' for either `bits32' or `bits64'. + +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +processors/*.h + +The target-specific `processors' header file defines integer types +of various sizes, and also defines certain C preprocessor macros that +characterize the target. The two examples supplied are `386-gcc.h' and +`SPARC-gcc.h'. The naming convention used for processor header files is +`-.h'. + +If 64-bit integers are supported by the compiler, the macro name `BITS64' +should be defined here along with the corresponding 64-bit integer +types. In addition, the function-like macro `LIT64' must be defined for +constructing 64-bit integer literals (constants). The `LIT64' macro is used +consistently in the SoftFloat code to annotate 64-bit literals. + +If `BITS64' is not defined, only the 32-bit version of SoftFloat can be +compiled. If `BITS64' _is_ defined, either can be compiled. + +If an inlining attribute (such as an `inline' keyword) is provided by the +compiler, the macro `INLINE' should be defined to the appropriate keyword. +If not, `INLINE' can be set to the keyword `static'. The `INLINE' macro +appears in the SoftFloat source code before every function that should +be inlined by the compiler. SoftFloat depends on inlining to obtain +good speed. Even if inlining cannot be forced with a language keyword, +the compiler may still be able to perform inlining on its own as an +optimization. If a command-line option is needed to convince the compiler +to perform this optimization, this should be assured in the makefile. (See +the section _Compiler_Options_and_Efficiency_ below.) + +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +softfloat/bits*/*/softfloat.h + +The target-specific `softfloat.h' header file defines the SoftFloat +interface as seen by clients. + +Unlike the actual function definitions in `softfloat.c', the declarations +in `softfloat.h' do not use any of the types defined by the `processors' +header file. This is done so that clients will not have to include the +`processors' header file in order to use SoftFloat. Nevertheless, the +target-specific declarations in `softfloat.h' must match what `softfloat.c' +expects. For example, if `int32' is defined as `int' in the `processors' +header file, then in `softfloat.h' the output of `float32_to_int32' should +be stated as `int', although in `softfloat.c' it is given in target- +independent form as `int32'. + +For the `bits64' implementation of SoftFloat, the macro names `FLOATX80' and +`FLOAT128' must be defined in order for the extended double-precision and +quadruple-precision formats to be enabled in the code. Conversely, either +or both of the extended formats can be disabled by simply removing the +`#define' of the respective macro. When an extended format is not enabled, +none of the functions that either input or output the format are defined, +and no space is taken up in `softfloat.o' by such functions. There is no +provision for disabling the usual single- and double-precision formats. + +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +softfloat/bits*/*/milieu.h + +The target-specific `milieu.h' header file provides declarations that are +needed to compile SoftFloat. In addition, deviations from ISO/ANSI C by +the compiler (such as names not properly declared in system header files) +are corrected in this header if possible. + +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +softfloat/bits*/*/softfloat-specialize + +This target-specific C source fragment defines: + +-- whether tininess for underflow is detected before or after rounding by + default; +-- what (if anything) special happens when exceptions are raised; +-- how signaling NaNs are distinguished from quiet NaNs; +-- the default generated quiet NaNs; and +-- how NaNs are propagated from function inputs to output. + +These details are not decided by the IEC/IEEE Standard. This fragment is +included verbatim within `softfloat.c' when SoftFloat is compiled. + +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +softfloat/bits*/softfloat-macros + +This target-independent C source fragment defines a number of arithmetic +functions used as primitives within the `softfloat.c' source. Most of the +functions defined here are intended to be inlined for efficiency. This +fragment is included verbatim within `softfloat.c' when SoftFloat is +compiled. + +Target-specific variations on this file are possible. See the section +_Processor-Specific_Optimization_of_`softfloat.c'_Using_`softfloat-macros'_ +below. + +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +softfloat/bits*/softfloat.c + +The target-independent `softfloat.c' source file contains the body of the +SoftFloat implementation. + +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +The inclusion of the files above within each other (using `#include') can be +shown graphically as follows: + + softfloat/bits*/softfloat.c + softfloat/bits*/*/milieu.h + processors/*.h + softfloat/bits*/*/softfloat.h + softfloat/bits*/*/softfloat-specialize + softfloat/bits*/softfloat-macros + +Note in particular that `softfloat.c' does not include the `processors' +header file directly. Rather, `softfloat.c' includes the target-specific +`milieu.h' header file, which in turn includes the processor header file. + + +------------------------------------------------------------------------------- +Steps to Creating a `softfloat.o' + +Porting and/or compiling SoftFloat involves the following steps: + +1. If one does not already exist, create an appropriate `.h' file in the + `processors' directory. + +2. If `BITS64' is defined in the `processors' header file, choose whether + to compile the 32-bit or 64-bit implementation of SoftFloat. If + `BITS64' is not defined, your only choice is the 32-bit implementation. + The remaining steps occur within either the `bits32' or `bits64' + subdirectories. + +3. If one does not already exist, create an appropriate target-specific + subdirectory by copying the given `templates' directory. + +4. In the target-specific subdirectory, edit the files `softfloat-specialize' + and `softfloat.h' to define the desired exception handling functions + and mode control values. In the `softfloat.h' header file, ensure also + that all declarations give the proper target-specific type (such as + `int' or `long') corresponding to the target-independent type used in + `softfloat.c' (such as `int32'). None of the type names declared in the + `processors' header file should appear in `softfloat.h'. + +5. In the target-specific subdirectory, edit the files `milieu.h' and + `Makefile' to reflect the current environment. + +6. In the target-specific subdirectory, execute `make'. + +For the targets that are supplied, if the expected compiler is available +(usually `gcc'), it should only be necessary to execute `make' in the +target-specific subdirectory. + + +------------------------------------------------------------------------------- +Making `softfloat.o' a Library + +SoftFloat is not made into a software library by the supplied makefile. +If desired, `softfloat.o' can easily be put into its own library (in Unix, +`softfloat.a') using the usual system tool (in Unix, `ar'). + + +------------------------------------------------------------------------------- +Testing SoftFloat + +SoftFloat can be tested using the `testsoftfloat' program by the same +author. The `testsoftfloat' program is part of the TestFloat package +available at the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/arithmetic/ +TestFloat.html'. + + +------------------------------------------------------------------------------- +Timing SoftFloat + +A program called `timesoftfloat' for timing the SoftFloat functions is +included with the SoftFloat source code. Compiling `timesoftfloat' should +pose no difficulties once `softfloat.o' exists. The supplied makefile +will create a `timesoftfloat' executable by default after generating +`softfloat.o'. See `timesoftfloat.txt' for documentation about using +`timesoftfloat'. + + +------------------------------------------------------------------------------- +Compiler Options and Efficiency + +In order to get good speed with SoftFloat, it is important that the compiler +inline the routines that have been marked `INLINE' in the code. Even if +inlining cannot be forced by an appropriate definition of the `INLINE' +macro, the compiler may still be able to perform inlining on its own as +an optimization. In that case, the makefile should be edited to give the +compiler whatever option is required to cause it to inline small functions. + +The ability of the processor to do fast shifts has been assumed. Efficiency +will not be as good on processors for which this is not the case (such as +the original Motorola 68000 or Intel 8086 processors). + + +------------------------------------------------------------------------------- +Processor-Specific Optimization of `softfloat.c' Using `softfloat-macros' + +The `softfloat-macros' source fragment defines arithmetic functions used +as primitives by `softfloat.c'. This file has been written in a target- +independent form. For a given target, it may be possible to improve on +these functions using target-specific and/or non-ISO-C features (such +as `asm' statements). For example, one of the ``macro'' functions takes +two word-size integers and returns their full product in two words. +This operation can be done directly in hardware on many processors; but +because it is not available through standard C, the function defined in +`softfloat-macros' uses four multiplies to achieve the same result. + +To address these shortcomings, a customized version of `softfloat-macros' +can be created in any of the target-specific subdirectories. A simple +modification to the target's makefile should be sufficient to ensure that +the custom version is used instead of the generic one. + + +------------------------------------------------------------------------------- +Contact Information + +At the time of this writing, the most up-to-date information about +SoftFloat and the latest release can be found at the Web page `http:// +HTTP.CS.Berkeley.EDU/~jhauser/arithmetic/SoftFloat.html'. + + diff --git a/StdLib/LibC/Softfloat/softfloat-specialize b/StdLib/LibC/Softfloat/softfloat-specialize new file mode 100644 index 0000000000..13ada988d1 --- /dev/null +++ b/StdLib/LibC/Softfloat/softfloat-specialize @@ -0,0 +1,529 @@ +/* $NetBSD: softfloat-specialize,v 1.8 2013/01/10 08:16:10 matt Exp $ */ + +/* This is a derivative work. */ + +/* +=============================================================================== + +This C source fragment is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2a. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these four paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +#include +#include +#include + +/* +------------------------------------------------------------------------------- +Underflow tininess-detection mode, statically initialized to default value. +(The declaration in `softfloat.h' must match the `int8' type here.) +------------------------------------------------------------------------------- +*/ +#ifdef SOFTFLOAT_FOR_GCC +static +#endif +int8 float_detect_tininess = float_tininess_after_rounding; + +/* +------------------------------------------------------------------------------- +Raises the exceptions specified by `flags'. Floating-point traps can be +defined here if desired. It is currently not possible for such a trap to +substitute a result value. If traps are not implemented, this routine +should be simply `float_exception_flags |= flags;'. +------------------------------------------------------------------------------- +*/ +#ifdef SOFTFLOAT_FOR_GCC +#ifndef set_float_exception_mask +#define float_exception_mask _softfloat_float_exception_mask +#endif +#endif +#ifndef set_float_exception_mask +fp_except float_exception_mask = 0; +#endif +void +float_raise( fp_except flags ) +{ + +#if 0 // Don't raise exceptions + siginfo_t info; + fp_except mask = float_exception_mask; + +#ifdef set_float_exception_mask + flags |= set_float_exception_flags(flags, 0); +#else + float_exception_flags |= flags; + flags = float_exception_flags; +#endif + + flags &= mask; + if ( flags ) { + memset(&info, 0, sizeof info); + info.si_signo = SIGFPE; + info.si_pid = getpid(); + info.si_uid = geteuid(); + if (flags & float_flag_underflow) + info.si_code = FPE_FLTUND; + else if (flags & float_flag_overflow) + info.si_code = FPE_FLTOVF; + else if (flags & float_flag_divbyzero) + info.si_code = FPE_FLTDIV; + else if (flags & float_flag_invalid) + info.si_code = FPE_FLTINV; + else if (flags & float_flag_inexact) + info.si_code = FPE_FLTRES; + sigqueueinfo(getpid(), &info); + } +#else // Don't raise exceptions + float_exception_flags |= flags; +#endif // Don't raise exceptions +} +#undef float_exception_mask + +/* +------------------------------------------------------------------------------- +Internal canonical NaN format. +------------------------------------------------------------------------------- +*/ +typedef struct { + flag sign; + bits64 high, low; +} commonNaNT; + +/* +------------------------------------------------------------------------------- +The pattern for a default generated single-precision NaN. +------------------------------------------------------------------------------- +*/ +#define float32_default_nan 0xFFFFFFFF + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is a NaN; +otherwise returns 0. +------------------------------------------------------------------------------- +*/ +#ifdef SOFTFLOAT_FOR_GCC +static +#endif +flag float32_is_nan( float32 a ) +{ + + return ( (bits32)0xFF000000 < (bits32) ( a<<1 ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is a signaling +NaN; otherwise returns 0. +------------------------------------------------------------------------------- +*/ +#if defined(SOFTFLOAT_FOR_GCC) && !defined(SOFTFLOATSPARC64_FOR_GCC) && \ + !defined(SOFTFLOAT_M68K_FOR_GCC) +static +#endif +flag float32_is_signaling_nan( float32 a ) +{ + + return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the single-precision floating-point NaN +`a' to the canonical NaN format. If `a' is a signaling NaN, the invalid +exception is raised. +------------------------------------------------------------------------------- +*/ +static commonNaNT float32ToCommonNaN( float32 a ) +{ + commonNaNT z; + + if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); + z.sign = a>>31; + z.low = 0; + z.high = ( (bits64) a )<<41; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the canonical NaN `a' to the single- +precision floating-point format. +------------------------------------------------------------------------------- +*/ +static float32 commonNaNToFloat32( commonNaNT a ) +{ + + return ( ( (bits32) a.sign )<<31 ) | 0x7FC00000 | (bits32)( a.high>>41 ); + +} + +/* +------------------------------------------------------------------------------- +Takes two single-precision floating-point values `a' and `b', one of which +is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a +signaling NaN, the invalid exception is raised. +------------------------------------------------------------------------------- +*/ +static float32 propagateFloat32NaN( float32 a, float32 b ) +{ + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; + + aIsNaN = float32_is_nan( a ); + aIsSignalingNaN = float32_is_signaling_nan( a ); + bIsNaN = float32_is_nan( b ); + bIsSignalingNaN = float32_is_signaling_nan( b ); + a |= 0x00400000; + b |= 0x00400000; + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); + if ( aIsNaN ) { + return ( aIsSignalingNaN & bIsNaN ) ? b : a; + } + else { + return b; + } + +} + +/* +------------------------------------------------------------------------------- +The pattern for a default generated double-precision NaN. +------------------------------------------------------------------------------- +*/ +#define float64_default_nan LIT64( 0xFFFFFFFFFFFFFFFF ) + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is a NaN; +otherwise returns 0. +------------------------------------------------------------------------------- +*/ +#ifdef SOFTFLOAT_FOR_GCC +static +#endif +flag float64_is_nan( float64 a ) +{ + + return ( (bits64)LIT64( 0xFFE0000000000000 ) < + (bits64) ( FLOAT64_DEMANGLE(a)<<1 ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is a signaling +NaN; otherwise returns 0. +------------------------------------------------------------------------------- +*/ +#if defined(SOFTFLOAT_FOR_GCC) && !defined(SOFTFLOATSPARC64_FOR_GCC) && \ + !defined(SOFTFLOATM68K_FOR_GCC) +static +#endif +flag float64_is_signaling_nan( float64 a ) +{ + + return + ( ( ( FLOAT64_DEMANGLE(a)>>51 ) & 0xFFF ) == 0xFFE ) + && ( FLOAT64_DEMANGLE(a) & LIT64( 0x0007FFFFFFFFFFFF ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point NaN +`a' to the canonical NaN format. If `a' is a signaling NaN, the invalid +exception is raised. +------------------------------------------------------------------------------- +*/ +static commonNaNT float64ToCommonNaN( float64 a ) +{ + commonNaNT z; + + if ( float64_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); + z.sign = (flag)(FLOAT64_DEMANGLE(a)>>63); + z.low = 0; + z.high = FLOAT64_DEMANGLE(a)<<12; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the canonical NaN `a' to the double- +precision floating-point format. +------------------------------------------------------------------------------- +*/ +static float64 commonNaNToFloat64( commonNaNT a ) +{ + + return FLOAT64_MANGLE( + ( ( (bits64) a.sign )<<63 ) + | LIT64( 0x7FF8000000000000 ) + | ( a.high>>12 ) ); + +} + +/* +------------------------------------------------------------------------------- +Takes two double-precision floating-point values `a' and `b', one of which +is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a +signaling NaN, the invalid exception is raised. +------------------------------------------------------------------------------- +*/ +static float64 propagateFloat64NaN( float64 a, float64 b ) +{ + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; + + aIsNaN = float64_is_nan( a ); + aIsSignalingNaN = float64_is_signaling_nan( a ); + bIsNaN = float64_is_nan( b ); + bIsSignalingNaN = float64_is_signaling_nan( b ); + a |= FLOAT64_MANGLE(LIT64( 0x0008000000000000 )); + b |= FLOAT64_MANGLE(LIT64( 0x0008000000000000 )); + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); + if ( aIsNaN ) { + return ( aIsSignalingNaN & bIsNaN ) ? b : a; + } + else { + return b; + } + +} + +#ifdef FLOATX80 + +/* +------------------------------------------------------------------------------- +The pattern for a default generated extended double-precision NaN. The +`high' and `low' values hold the most- and least-significant bits, +respectively. +------------------------------------------------------------------------------- +*/ +#define floatx80_default_nan_high 0xFFFF +#define floatx80_default_nan_low LIT64( 0xFFFFFFFFFFFFFFFF ) + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is a +NaN; otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag floatx80_is_nan( floatx80 a ) +{ + + return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( a.low<<1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is a +signaling NaN; otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag floatx80_is_signaling_nan( floatx80 a ) +{ + bits64 aLow; + + aLow = a.low & ~ LIT64( 0x4000000000000000 ); + return + ( ( a.high & 0x7FFF ) == 0x7FFF ) + && (bits64) ( aLow<<1 ) + && ( a.low == aLow ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the extended double-precision floating- +point NaN `a' to the canonical NaN format. If `a' is a signaling NaN, the +invalid exception is raised. +------------------------------------------------------------------------------- +*/ +static commonNaNT floatx80ToCommonNaN( floatx80 a ) +{ + commonNaNT z; + + if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); + z.sign = a.high>>15; + z.low = 0; + z.high = a.low<<1; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the canonical NaN `a' to the extended +double-precision floating-point format. +------------------------------------------------------------------------------- +*/ +static floatx80 commonNaNToFloatx80( commonNaNT a ) +{ + floatx80 z; + + z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 ); + z.high = ( ( (bits16) a.sign )<<15 ) | 0x7FFF; + return z; + +} + +/* +------------------------------------------------------------------------------- +Takes two extended double-precision floating-point values `a' and `b', one +of which is a NaN, and returns the appropriate NaN result. If either `a' or +`b' is a signaling NaN, the invalid exception is raised. +------------------------------------------------------------------------------- +*/ +static floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b ) +{ + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; + + aIsNaN = floatx80_is_nan( a ); + aIsSignalingNaN = floatx80_is_signaling_nan( a ); + bIsNaN = floatx80_is_nan( b ); + bIsSignalingNaN = floatx80_is_signaling_nan( b ); + a.low |= LIT64( 0xC000000000000000 ); + b.low |= LIT64( 0xC000000000000000 ); + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); + if ( aIsNaN ) { + return ( aIsSignalingNaN & bIsNaN ) ? b : a; + } + else { + return b; + } + +} + +#endif + +#ifdef FLOAT128 + +/* +------------------------------------------------------------------------------- +The pattern for a default generated quadruple-precision NaN. The `high' and +`low' values hold the most- and least-significant bits, respectively. +------------------------------------------------------------------------------- +*/ +#define float128_default_nan_high LIT64( 0xFFFFFFFFFFFFFFFF ) +#define float128_default_nan_low LIT64( 0xFFFFFFFFFFFFFFFF ) + +/* +------------------------------------------------------------------------------- +Returns 1 if the quadruple-precision floating-point value `a' is a NaN; +otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag float128_is_nan( float128 a ) +{ + + return + ( (bits64)LIT64( 0xFFFE000000000000 ) <= (bits64) ( a.high<<1 ) ) + && ( a.low || ( a.high & LIT64( 0x0000FFFFFFFFFFFF ) ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the quadruple-precision floating-point value `a' is a +signaling NaN; otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag float128_is_signaling_nan( float128 a ) +{ + + return + ( ( ( a.high>>47 ) & 0xFFFF ) == 0xFFFE ) + && ( a.low || ( a.high & LIT64( 0x00007FFFFFFFFFFF ) ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the quadruple-precision floating-point NaN +`a' to the canonical NaN format. If `a' is a signaling NaN, the invalid +exception is raised. +------------------------------------------------------------------------------- +*/ +static commonNaNT float128ToCommonNaN( float128 a ) +{ + commonNaNT z; + + if ( float128_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); + z.sign = (flag)(a.high>>63); + shortShift128Left( a.high, a.low, 16, &z.high, &z.low ); + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the canonical NaN `a' to the quadruple- +precision floating-point format. +------------------------------------------------------------------------------- +*/ +static float128 commonNaNToFloat128( commonNaNT a ) +{ + float128 z; + + shift128Right( a.high, a.low, 16, &z.high, &z.low ); + z.high |= ( ( (bits64) a.sign )<<63 ) | LIT64( 0x7FFF800000000000 ); + return z; + +} + +/* +------------------------------------------------------------------------------- +Takes two quadruple-precision floating-point values `a' and `b', one of +which is a NaN, and returns the appropriate NaN result. If either `a' or +`b' is a signaling NaN, the invalid exception is raised. +------------------------------------------------------------------------------- +*/ +static float128 propagateFloat128NaN( float128 a, float128 b ) +{ + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; + + aIsNaN = float128_is_nan( a ); + aIsSignalingNaN = float128_is_signaling_nan( a ); + bIsNaN = float128_is_nan( b ); + bIsSignalingNaN = float128_is_signaling_nan( b ); + a.high |= LIT64( 0x0000800000000000 ); + b.high |= LIT64( 0x0000800000000000 ); + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); + if ( aIsNaN ) { + return ( aIsSignalingNaN & bIsNaN ) ? b : a; + } + else { + return b; + } + +} + +#endif + diff --git a/StdLib/LibC/Softfloat/softfloat.txt b/StdLib/LibC/Softfloat/softfloat.txt new file mode 100644 index 0000000000..c1463b2f30 --- /dev/null +++ b/StdLib/LibC/Softfloat/softfloat.txt @@ -0,0 +1,372 @@ +$NetBSD: softfloat.txt,v 1.2 2006/11/24 19:46:58 christos Exp $ + +SoftFloat Release 2a General Documentation + +John R. Hauser +1998 December 13 + + +------------------------------------------------------------------------------- +Introduction + +SoftFloat is a software implementation of floating-point that conforms to +the IEC/IEEE Standard for Binary Floating-Point Arithmetic. As many as four +formats are supported: single precision, double precision, extended double +precision, and quadruple precision. All operations required by the standard +are implemented, except for conversions to and from decimal. + +This document gives information about the types defined and the routines +implemented by SoftFloat. It does not attempt to define or explain the +IEC/IEEE Floating-Point Standard. Details about the standard are available +elsewhere. + + +------------------------------------------------------------------------------- +Limitations + +SoftFloat is written in C and is designed to work with other C code. The +SoftFloat header files assume an ISO/ANSI-style C compiler. No attempt +has been made to accommodate compilers that are not ISO-conformant. In +particular, the distributed header files will not be acceptable to any +compiler that does not recognize function prototypes. + +Support for the extended double-precision and quadruple-precision formats +depends on a C compiler that implements 64-bit integer arithmetic. If the +largest integer format supported by the C compiler is 32 bits, SoftFloat is +limited to only single and double precisions. When that is the case, all +references in this document to the extended double precision, quadruple +precision, and 64-bit integers should be ignored. + + +------------------------------------------------------------------------------- +Contents + + Introduction + Limitations + Contents + Legal Notice + Types and Functions + Rounding Modes + Extended Double-Precision Rounding Precision + Exceptions and Exception Flags + Function Details + Conversion Functions + Standard Arithmetic Functions + Remainder Functions + Round-to-Integer Functions + Comparison Functions + Signaling NaN Test Functions + Raise-Exception Function + Contact Information + + + +------------------------------------------------------------------------------- +Legal Notice + +SoftFloat was written by John R. Hauser. This work was made possible in +part by the International Computer Science Institute, located at Suite 600, +1947 Center Street, Berkeley, California 94704. Funding was partially +provided by the National Science Foundation under grant MIP-9311980. The +original version of this code was written as part of a project to build +a fixed-point vector processor in collaboration with the University of +California at Berkeley, overseen by Profs. Nelson Morgan and John Wawrzynek. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + + +------------------------------------------------------------------------------- +Types and Functions + +When 64-bit integers are supported by the compiler, the `softfloat.h' header +file defines four types: `float32' (single precision), `float64' (double +precision), `floatx80' (extended double precision), and `float128' +(quadruple precision). The `float32' and `float64' types are defined in +terms of 32-bit and 64-bit integer types, respectively, while the `float128' +type is defined as a structure of two 64-bit integers, taking into account +the byte order of the particular machine being used. The `floatx80' type +is defined as a structure containing one 16-bit and one 64-bit integer, with +the machine's byte order again determining the order of the `high' and `low' +fields. + +When 64-bit integers are _not_ supported by the compiler, the `softfloat.h' +header file defines only two types: `float32' and `float64'. Because +ISO/ANSI C guarantees at least one built-in integer type of 32 bits, +the `float32' type is identified with an appropriate integer type. The +`float64' type is defined as a structure of two 32-bit integers, with the +machine's byte order determining the order of the fields. + +In either case, the types in `softfloat.h' are defined such that if a system +implements the usual C `float' and `double' types according to the IEC/IEEE +Standard, then the `float32' and `float64' types should be indistinguishable +in memory from the native `float' and `double' types. (On the other hand, +when `float32' or `float64' values are placed in processor registers by +the compiler, the type of registers used may differ from those used for the +native `float' and `double' types.) + +SoftFloat implements the following arithmetic operations: + +-- Conversions among all the floating-point formats, and also between + integers (32-bit and 64-bit) and any of the floating-point formats. + +-- The usual add, subtract, multiply, divide, and square root operations + for all floating-point formats. + +-- For each format, the floating-point remainder operation defined by the + IEC/IEEE Standard. + +-- For each floating-point format, a ``round to integer'' operation that + rounds to the nearest integer value in the same format. (The floating- + point formats can hold integer values, of course.) + +-- Comparisons between two values in the same floating-point format. + +The only functions required by the IEC/IEEE Standard that are not provided +are conversions to and from decimal. + + +------------------------------------------------------------------------------- +Rounding Modes + +All four rounding modes prescribed by the IEC/IEEE Standard are implemented +for all operations that require rounding. The rounding mode is selected +by the global variable `float_rounding_mode'. This variable may be set +to one of the values `float_round_nearest_even', `float_round_to_zero', +`float_round_down', or `float_round_up'. The rounding mode is initialized +to nearest/even. + + +------------------------------------------------------------------------------- +Extended Double-Precision Rounding Precision + +For extended double precision (`floatx80') only, the rounding precision +of the standard arithmetic operations is controlled by the global variable +`floatx80_rounding_precision'. The operations affected are: + + floatx80_add floatx80_sub floatx80_mul floatx80_div floatx80_sqrt + +When `floatx80_rounding_precision' is set to its default value of 80, these +operations are rounded (as usual) to the full precision of the extended +double-precision format. Setting `floatx80_rounding_precision' to 32 +or to 64 causes the operations listed to be rounded to reduced precision +equivalent to single precision (`float32') or to double precision +(`float64'), respectively. When rounding to reduced precision, additional +bits in the result significand beyond the rounding point are set to zero. +The consequences of setting `floatx80_rounding_precision' to a value other +than 32, 64, or 80 is not specified. Operations other than the ones listed +above are not affected by `floatx80_rounding_precision'. + + +------------------------------------------------------------------------------- +Exceptions and Exception Flags + +All five exception flags required by the IEC/IEEE Standard are +implemented. Each flag is stored as a unique bit in the global variable +`float_exception_flags'. The positions of the exception flag bits within +this variable are determined by the bit masks `float_flag_inexact', +`float_flag_underflow', `float_flag_overflow', `float_flag_divbyzero', and +`float_flag_invalid'. The exception flags variable is initialized to all 0, +meaning no exceptions. + +An individual exception flag can be cleared with the statement + + float_exception_flags &= ~ float_flag_; + +where `' is the appropriate name. To raise a floating-point +exception, the SoftFloat function `float_raise' should be used (see below). + +In the terminology of the IEC/IEEE Standard, SoftFloat can detect tininess +for underflow either before or after rounding. The choice is made by +the global variable `float_detect_tininess', which can be set to either +`float_tininess_before_rounding' or `float_tininess_after_rounding'. +Detecting tininess after rounding is better because it results in fewer +spurious underflow signals. The other option is provided for compatibility +with some systems. Like most systems, SoftFloat always detects loss of +accuracy for underflow as an inexact result. + + +------------------------------------------------------------------------------- +Function Details + +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Conversion Functions + +All conversions among the floating-point formats are supported, as are all +conversions between a floating-point format and 32-bit and 64-bit signed +integers. The complete set of conversion functions is: + + int32_to_float32 int64_to_float32 + int32_to_float64 int64_to_float32 + int32_to_floatx80 int64_to_floatx80 + int32_to_float128 int64_to_float128 + + float32_to_int32 float32_to_int64 + float32_to_int32 float64_to_int64 + floatx80_to_int32 floatx80_to_int64 + float128_to_int32 float128_to_int64 + + float32_to_float64 float32_to_floatx80 float32_to_float128 + float64_to_float32 float64_to_floatx80 float64_to_float128 + floatx80_to_float32 floatx80_to_float64 floatx80_to_float128 + float128_to_float32 float128_to_float64 float128_to_floatx80 + +Each conversion function takes one operand of the appropriate type and +returns one result. Conversions from a smaller to a larger floating-point +format are always exact and so require no rounding. Conversions from 32-bit +integers to double precision and larger formats are also exact, and likewise +for conversions from 64-bit integers to extended double and quadruple +precisions. + +Conversions from floating-point to integer raise the invalid exception if +the source value cannot be rounded to a representable integer of the desired +size (32 or 64 bits). If the floating-point operand is a NaN, the largest +positive integer is returned. Otherwise, if the conversion overflows, the +largest integer with the same sign as the operand is returned. + +On conversions to integer, if the floating-point operand is not already an +integer value, the operand is rounded according to the current rounding +mode as specified by `float_rounding_mode'. Because C (and perhaps other +languages) require that conversions to integers be rounded toward zero, the +following functions are provided for improved speed and convenience: + + float32_to_int32_round_to_zero float32_to_int64_round_to_zero + float64_to_int32_round_to_zero float64_to_int64_round_to_zero + floatx80_to_int32_round_to_zero floatx80_to_int64_round_to_zero + float128_to_int32_round_to_zero float128_to_int64_round_to_zero + +These variant functions ignore `float_rounding_mode' and always round toward +zero. + +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Standard Arithmetic Functions + +The following standard arithmetic functions are provided: + + float32_add float32_sub float32_mul float32_div float32_sqrt + float64_add float64_sub float64_mul float64_div float64_sqrt + floatx80_add floatx80_sub floatx80_mul floatx80_div floatx80_sqrt + float128_add float128_sub float128_mul float128_div float128_sqrt + +Each function takes two operands, except for `sqrt' which takes only one. +The operands and result are all of the same type. + +Rounding of the extended double-precision (`floatx80') functions is affected +by the `floatx80_rounding_precision' variable, as explained above in the +section _Extended_Double-Precision_Rounding_Precision_. + +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Remainder Functions + +For each format, SoftFloat implements the remainder function according to +the IEC/IEEE Standard. The remainder functions are: + + float32_rem + float64_rem + floatx80_rem + float128_rem + +Each remainder function takes two operands. The operands and result are all +of the same type. Given operands x and y, the remainder functions return +the value x - n*y, where n is the integer closest to x/y. If x/y is exactly +halfway between two integers, n is the even integer closest to x/y. The +remainder functions are always exact and so require no rounding. + +Depending on the relative magnitudes of the operands, the remainder +functions can take considerably longer to execute than the other SoftFloat +functions. This is inherent in the remainder operation itself and is not a +flaw in the SoftFloat implementation. + +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Round-to-Integer Functions + +For each format, SoftFloat implements the round-to-integer function +specified by the IEC/IEEE Standard. The functions are: + + float32_round_to_int + float64_round_to_int + floatx80_round_to_int + float128_round_to_int + +Each function takes a single floating-point operand and returns a result of +the same type. (Note that the result is not an integer type.) The operand +is rounded to an exact integer according to the current rounding mode, and +the resulting integer value is returned in the same floating-point format. + +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Comparison Functions + +The following floating-point comparison functions are provided: + + float32_eq float32_le float32_lt + float64_eq float64_le float64_lt + floatx80_eq floatx80_le floatx80_lt + float128_eq float128_le float128_lt + +Each function takes two operands of the same type and returns a 1 or 0 +representing either _true_ or _false_. The abbreviation `eq' stands for +``equal'' (=); `le' stands for ``less than or equal'' (<=); and `lt' stands +for ``less than'' (<). + +The standard greater-than (>), greater-than-or-equal (>=), and not-equal +(!=) functions are easily obtained using the functions provided. The +not-equal function is just the logical complement of the equal function. +The greater-than-or-equal function is identical to the less-than-or-equal +function with the operands reversed; and the greater-than function can be +obtained from the less-than function in the same way. + +The IEC/IEEE Standard specifies that the less-than-or-equal and less-than +functions raise the invalid exception if either input is any kind of NaN. +The equal functions, on the other hand, are defined not to raise the invalid +exception on quiet NaNs. For completeness, SoftFloat provides the following +additional functions: + + float32_eq_signaling float32_le_quiet float32_lt_quiet + float64_eq_signaling float64_le_quiet float64_lt_quiet + floatx80_eq_signaling floatx80_le_quiet floatx80_lt_quiet + float128_eq_signaling float128_le_quiet float128_lt_quiet + +The `signaling' equal functions are identical to the standard functions +except that the invalid exception is raised for any NaN input. Likewise, +the `quiet' comparison functions are identical to their counterparts except +that the invalid exception is not raised for quiet NaNs. + +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Signaling NaN Test Functions + +The following functions test whether a floating-point value is a signaling +NaN: + + float32_is_signaling_nan + float64_is_signaling_nan + floatx80_is_signaling_nan + float128_is_signaling_nan + +The functions take one operand and return 1 if the operand is a signaling +NaN and 0 otherwise. + +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Raise-Exception Function + +SoftFloat provides a function for raising floating-point exceptions: + + float_raise + +The function takes a mask indicating the set of exceptions to raise. No +result is returned. In addition to setting the specified exception flags, +this function may cause a trap or abort appropriate for the current system. + +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + +------------------------------------------------------------------------------- +Contact Information + +At the time of this writing, the most up-to-date information about +SoftFloat and the latest release can be found at the Web page `http:// +HTTP.CS.Berkeley.EDU/~jhauser/arithmetic/SoftFloat.html'. + + diff --git a/StdLib/LibC/Softfloat/templates/milieu.h b/StdLib/LibC/Softfloat/templates/milieu.h new file mode 100644 index 0000000000..2fcfa1fa12 --- /dev/null +++ b/StdLib/LibC/Softfloat/templates/milieu.h @@ -0,0 +1,48 @@ + +/* +=============================================================================== + +This C header file is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2a. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these four paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +/* +------------------------------------------------------------------------------- +Include common integer types and flags. +------------------------------------------------------------------------------- +*/ +#include "../../../processors/!!!processor.h" + +/* +------------------------------------------------------------------------------- +Symbolic Boolean literals. +------------------------------------------------------------------------------- +*/ +enum { + FALSE = 0, + TRUE = 1 +}; + diff --git a/StdLib/LibC/Softfloat/templates/softfloat-specialize b/StdLib/LibC/Softfloat/templates/softfloat-specialize new file mode 100644 index 0000000000..d8b2500f4a --- /dev/null +++ b/StdLib/LibC/Softfloat/templates/softfloat-specialize @@ -0,0 +1,464 @@ + +/* +=============================================================================== + +This C source fragment is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2a. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these four paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +/* +------------------------------------------------------------------------------- +Underflow tininess-detection mode, statically initialized to default value. +(The declaration in `softfloat.h' must match the `int8' type here.) +------------------------------------------------------------------------------- +*/ +int8 float_detect_tininess = float_tininess_after_rounding; + +/* +------------------------------------------------------------------------------- +Raises the exceptions specified by `flags'. Floating-point traps can be +defined here if desired. It is currently not possible for such a trap to +substitute a result value. If traps are not implemented, this routine +should be simply `float_exception_flags |= flags;'. +------------------------------------------------------------------------------- +*/ +void float_raise( int8 flags ) +{ + + float_exception_flags |= flags; + +} + +/* +------------------------------------------------------------------------------- +Internal canonical NaN format. +------------------------------------------------------------------------------- +*/ +typedef struct { + flag sign; + bits64 high, low; +} commonNaNT; + +/* +------------------------------------------------------------------------------- +The pattern for a default generated single-precision NaN. +------------------------------------------------------------------------------- +*/ +#define float32_default_nan 0xFFFFFFFF + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is a NaN; +otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag float32_is_nan( float32 a ) +{ + + return ( 0xFF000000 < (bits32) ( a<<1 ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is a signaling +NaN; otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag float32_is_signaling_nan( float32 a ) +{ + + return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the single-precision floating-point NaN +`a' to the canonical NaN format. If `a' is a signaling NaN, the invalid +exception is raised. +------------------------------------------------------------------------------- +*/ +static commonNaNT float32ToCommonNaN( float32 a ) +{ + commonNaNT z; + + if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); + z.sign = a>>31; + z.low = 0; + z.high = ( (bits64) a )<<41; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the canonical NaN `a' to the single- +precision floating-point format. +------------------------------------------------------------------------------- +*/ +static float32 commonNaNToFloat32( commonNaNT a ) +{ + + return ( ( (bits32) a.sign )<<31 ) | 0x7FC00000 | ( a.high>>41 ); + +} + +/* +------------------------------------------------------------------------------- +Takes two single-precision floating-point values `a' and `b', one of which +is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a +signaling NaN, the invalid exception is raised. +------------------------------------------------------------------------------- +*/ +static float32 propagateFloat32NaN( float32 a, float32 b ) +{ + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; + + aIsNaN = float32_is_nan( a ); + aIsSignalingNaN = float32_is_signaling_nan( a ); + bIsNaN = float32_is_nan( b ); + bIsSignalingNaN = float32_is_signaling_nan( b ); + a |= 0x00400000; + b |= 0x00400000; + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); + if ( aIsNaN ) { + return ( aIsSignalingNaN & bIsNaN ) ? b : a; + } + else { + return b; + } + +} + +/* +------------------------------------------------------------------------------- +The pattern for a default generated double-precision NaN. +------------------------------------------------------------------------------- +*/ +#define float64_default_nan LIT64( 0xFFFFFFFFFFFFFFFF ) + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is a NaN; +otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag float64_is_nan( float64 a ) +{ + + return ( LIT64( 0xFFE0000000000000 ) < (bits64) ( a<<1 ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is a signaling +NaN; otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag float64_is_signaling_nan( float64 a ) +{ + + return + ( ( ( a>>51 ) & 0xFFF ) == 0xFFE ) + && ( a & LIT64( 0x0007FFFFFFFFFFFF ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point NaN +`a' to the canonical NaN format. If `a' is a signaling NaN, the invalid +exception is raised. +------------------------------------------------------------------------------- +*/ +static commonNaNT float64ToCommonNaN( float64 a ) +{ + commonNaNT z; + + if ( float64_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); + z.sign = a>>63; + z.low = 0; + z.high = a<<12; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the canonical NaN `a' to the double- +precision floating-point format. +------------------------------------------------------------------------------- +*/ +static float64 commonNaNToFloat64( commonNaNT a ) +{ + + return + ( ( (bits64) a.sign )<<63 ) + | LIT64( 0x7FF8000000000000 ) + | ( a.high>>12 ); + +} + +/* +------------------------------------------------------------------------------- +Takes two double-precision floating-point values `a' and `b', one of which +is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a +signaling NaN, the invalid exception is raised. +------------------------------------------------------------------------------- +*/ +static float64 propagateFloat64NaN( float64 a, float64 b ) +{ + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; + + aIsNaN = float64_is_nan( a ); + aIsSignalingNaN = float64_is_signaling_nan( a ); + bIsNaN = float64_is_nan( b ); + bIsSignalingNaN = float64_is_signaling_nan( b ); + a |= LIT64( 0x0008000000000000 ); + b |= LIT64( 0x0008000000000000 ); + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); + if ( aIsNaN ) { + return ( aIsSignalingNaN & bIsNaN ) ? b : a; + } + else { + return b; + } + +} + +#ifdef FLOATX80 + +/* +------------------------------------------------------------------------------- +The pattern for a default generated extended double-precision NaN. The +`high' and `low' values hold the most- and least-significant bits, +respectively. +------------------------------------------------------------------------------- +*/ +#define floatx80_default_nan_high 0xFFFF +#define floatx80_default_nan_low LIT64( 0xFFFFFFFFFFFFFFFF ) + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is a +NaN; otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag floatx80_is_nan( floatx80 a ) +{ + + return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( a.low<<1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is a +signaling NaN; otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag floatx80_is_signaling_nan( floatx80 a ) +{ + bits64 aLow; + + aLow = a.low & ~ LIT64( 0x4000000000000000 ); + return + ( ( a.high & 0x7FFF ) == 0x7FFF ) + && (bits64) ( aLow<<1 ) + && ( a.low == aLow ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the extended double-precision floating- +point NaN `a' to the canonical NaN format. If `a' is a signaling NaN, the +invalid exception is raised. +------------------------------------------------------------------------------- +*/ +static commonNaNT floatx80ToCommonNaN( floatx80 a ) +{ + commonNaNT z; + + if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); + z.sign = a.high>>15; + z.low = 0; + z.high = a.low<<1; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the canonical NaN `a' to the extended +double-precision floating-point format. +------------------------------------------------------------------------------- +*/ +static floatx80 commonNaNToFloatx80( commonNaNT a ) +{ + floatx80 z; + + z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 ); + z.high = ( ( (bits16) a.sign )<<15 ) | 0x7FFF; + return z; + +} + +/* +------------------------------------------------------------------------------- +Takes two extended double-precision floating-point values `a' and `b', one +of which is a NaN, and returns the appropriate NaN result. If either `a' or +`b' is a signaling NaN, the invalid exception is raised. +------------------------------------------------------------------------------- +*/ +static floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b ) +{ + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; + + aIsNaN = floatx80_is_nan( a ); + aIsSignalingNaN = floatx80_is_signaling_nan( a ); + bIsNaN = floatx80_is_nan( b ); + bIsSignalingNaN = floatx80_is_signaling_nan( b ); + a.low |= LIT64( 0xC000000000000000 ); + b.low |= LIT64( 0xC000000000000000 ); + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); + if ( aIsNaN ) { + return ( aIsSignalingNaN & bIsNaN ) ? b : a; + } + else { + return b; + } + +} + +#endif + +#ifdef FLOAT128 + +/* +------------------------------------------------------------------------------- +The pattern for a default generated quadruple-precision NaN. The `high' and +`low' values hold the most- and least-significant bits, respectively. +------------------------------------------------------------------------------- +*/ +#define float128_default_nan_high LIT64( 0xFFFFFFFFFFFFFFFF ) +#define float128_default_nan_low LIT64( 0xFFFFFFFFFFFFFFFF ) + +/* +------------------------------------------------------------------------------- +Returns 1 if the quadruple-precision floating-point value `a' is a NaN; +otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag float128_is_nan( float128 a ) +{ + + return + ( LIT64( 0xFFFE000000000000 ) <= (bits64) ( a.high<<1 ) ) + && ( a.low || ( a.high & LIT64( 0x0000FFFFFFFFFFFF ) ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the quadruple-precision floating-point value `a' is a +signaling NaN; otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag float128_is_signaling_nan( float128 a ) +{ + + return + ( ( ( a.high>>47 ) & 0xFFFF ) == 0xFFFE ) + && ( a.low || ( a.high & LIT64( 0x00007FFFFFFFFFFF ) ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the quadruple-precision floating-point NaN +`a' to the canonical NaN format. If `a' is a signaling NaN, the invalid +exception is raised. +------------------------------------------------------------------------------- +*/ +static commonNaNT float128ToCommonNaN( float128 a ) +{ + commonNaNT z; + + if ( float128_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); + z.sign = a.high>>63; + shortShift128Left( a.high, a.low, 16, &z.high, &z.low ); + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the canonical NaN `a' to the quadruple- +precision floating-point format. +------------------------------------------------------------------------------- +*/ +static float128 commonNaNToFloat128( commonNaNT a ) +{ + float128 z; + + shift128Right( a.high, a.low, 16, &z.high, &z.low ); + z.high |= ( ( (bits64) a.sign )<<63 ) | LIT64( 0x7FFF800000000000 ); + return z; + +} + +/* +------------------------------------------------------------------------------- +Takes two quadruple-precision floating-point values `a' and `b', one of +which is a NaN, and returns the appropriate NaN result. If either `a' or +`b' is a signaling NaN, the invalid exception is raised. +------------------------------------------------------------------------------- +*/ +static float128 propagateFloat128NaN( float128 a, float128 b ) +{ + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; + + aIsNaN = float128_is_nan( a ); + aIsSignalingNaN = float128_is_signaling_nan( a ); + bIsNaN = float128_is_nan( b ); + bIsSignalingNaN = float128_is_signaling_nan( b ); + a.high |= LIT64( 0x0000800000000000 ); + b.high |= LIT64( 0x0000800000000000 ); + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); + if ( aIsNaN ) { + return ( aIsSignalingNaN & bIsNaN ) ? b : a; + } + else { + return b; + } + +} + +#endif + diff --git a/StdLib/LibC/Softfloat/templates/softfloat.h b/StdLib/LibC/Softfloat/templates/softfloat.h new file mode 100644 index 0000000000..8c0fe10134 --- /dev/null +++ b/StdLib/LibC/Softfloat/templates/softfloat.h @@ -0,0 +1,290 @@ + +/* +=============================================================================== + +This C header file is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2a. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these four paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +/* +------------------------------------------------------------------------------- +The macro `FLOATX80' must be defined to enable the extended double-precision +floating-point format `floatx80'. If this macro is not defined, the +`floatx80' type will not be defined, and none of the functions that either +input or output the `floatx80' type will be defined. The same applies to +the `FLOAT128' macro and the quadruple-precision format `float128'. +------------------------------------------------------------------------------- +*/ +#define FLOATX80 +#define FLOAT128 + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE floating-point types. +------------------------------------------------------------------------------- +*/ +typedef !!!bits32 float32; +typedef !!!bits64 float64; +#ifdef FLOATX80 +typedef struct { + !!!bits16 high; + !!!bits64 low; +} floatx80; +#endif +#ifdef FLOAT128 +typedef struct { + !!!bits64 high, low; +} float128; +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE floating-point underflow tininess-detection mode. +------------------------------------------------------------------------------- +*/ +extern !!!int8 float_detect_tininess; +enum { + float_tininess_after_rounding = 0, + float_tininess_before_rounding = 1 +}; + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE floating-point rounding mode. +------------------------------------------------------------------------------- +*/ +extern !!!int8 float_rounding_mode; +enum { + float_round_nearest_even = 0, + float_round_to_zero = 1, + float_round_down = 2, + float_round_up = 3 +}; + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE floating-point exception flags. +------------------------------------------------------------------------------- +*/ +extern !!!int8 float_exception_flags; +enum { + float_flag_inexact = 1, + float_flag_underflow = 2, + float_flag_overflow = 4, + float_flag_divbyzero = 8, + float_flag_invalid = 16 +}; + +/* +------------------------------------------------------------------------------- +Routine to raise any or all of the software IEC/IEEE floating-point +exception flags. +------------------------------------------------------------------------------- +*/ +void float_raise( !!!int8 ); + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE integer-to-floating-point conversion routines. +------------------------------------------------------------------------------- +*/ +float32 int32_to_float32( !!!int32 ); +float64 int32_to_float64( !!!int32 ); +#ifdef FLOATX80 +floatx80 int32_to_floatx80( !!!int32 ); +#endif +#ifdef FLOAT128 +float128 int32_to_float128( !!!int32 ); +#endif +float32 int64_to_float32( !!!int64 ); +float64 int64_to_float64( !!!int64 ); +#ifdef FLOATX80 +floatx80 int64_to_floatx80( !!!int64 ); +#endif +#ifdef FLOAT128 +float128 int64_to_float128( !!!int64 ); +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE single-precision conversion routines. +------------------------------------------------------------------------------- +*/ +!!!int32 float32_to_int32( float32 ); +!!!int32 float32_to_int32_round_to_zero( float32 ); +!!!int64 float32_to_int64( float32 ); +!!!int64 float32_to_int64_round_to_zero( float32 ); +float64 float32_to_float64( float32 ); +#ifdef FLOATX80 +floatx80 float32_to_floatx80( float32 ); +#endif +#ifdef FLOAT128 +float128 float32_to_float128( float32 ); +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE single-precision operations. +------------------------------------------------------------------------------- +*/ +float32 float32_round_to_int( float32 ); +float32 float32_add( float32, float32 ); +float32 float32_sub( float32, float32 ); +float32 float32_mul( float32, float32 ); +float32 float32_div( float32, float32 ); +float32 float32_rem( float32, float32 ); +float32 float32_sqrt( float32 ); +!!!flag float32_eq( float32, float32 ); +!!!flag float32_le( float32, float32 ); +!!!flag float32_lt( float32, float32 ); +!!!flag float32_eq_signaling( float32, float32 ); +!!!flag float32_le_quiet( float32, float32 ); +!!!flag float32_lt_quiet( float32, float32 ); +!!!flag float32_is_signaling_nan( float32 ); + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE double-precision conversion routines. +------------------------------------------------------------------------------- +*/ +!!!int32 float64_to_int32( float64 ); +!!!int32 float64_to_int32_round_to_zero( float64 ); +!!!int64 float64_to_int64( float64 ); +!!!int64 float64_to_int64_round_to_zero( float64 ); +float32 float64_to_float32( float64 ); +#ifdef FLOATX80 +floatx80 float64_to_floatx80( float64 ); +#endif +#ifdef FLOAT128 +float128 float64_to_float128( float64 ); +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE double-precision operations. +------------------------------------------------------------------------------- +*/ +float64 float64_round_to_int( float64 ); +float64 float64_add( float64, float64 ); +float64 float64_sub( float64, float64 ); +float64 float64_mul( float64, float64 ); +float64 float64_div( float64, float64 ); +float64 float64_rem( float64, float64 ); +float64 float64_sqrt( float64 ); +!!!flag float64_eq( float64, float64 ); +!!!flag float64_le( float64, float64 ); +!!!flag float64_lt( float64, float64 ); +!!!flag float64_eq_signaling( float64, float64 ); +!!!flag float64_le_quiet( float64, float64 ); +!!!flag float64_lt_quiet( float64, float64 ); +!!!flag float64_is_signaling_nan( float64 ); + +#ifdef FLOATX80 + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE extended double-precision conversion routines. +------------------------------------------------------------------------------- +*/ +!!!int32 floatx80_to_int32( floatx80 ); +!!!int32 floatx80_to_int32_round_to_zero( floatx80 ); +!!!int64 floatx80_to_int64( floatx80 ); +!!!int64 floatx80_to_int64_round_to_zero( floatx80 ); +float32 floatx80_to_float32( floatx80 ); +float64 floatx80_to_float64( floatx80 ); +#ifdef FLOAT128 +float128 floatx80_to_float128( floatx80 ); +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE extended double-precision rounding precision. Valid +values are 32, 64, and 80. +------------------------------------------------------------------------------- +*/ +extern !!!int8 floatx80_rounding_precision; + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE extended double-precision operations. +------------------------------------------------------------------------------- +*/ +floatx80 floatx80_round_to_int( floatx80 ); +floatx80 floatx80_add( floatx80, floatx80 ); +floatx80 floatx80_sub( floatx80, floatx80 ); +floatx80 floatx80_mul( floatx80, floatx80 ); +floatx80 floatx80_div( floatx80, floatx80 ); +floatx80 floatx80_rem( floatx80, floatx80 ); +floatx80 floatx80_sqrt( floatx80 ); +!!!flag floatx80_eq( floatx80, floatx80 ); +!!!flag floatx80_le( floatx80, floatx80 ); +!!!flag floatx80_lt( floatx80, floatx80 ); +!!!flag floatx80_eq_signaling( floatx80, floatx80 ); +!!!flag floatx80_le_quiet( floatx80, floatx80 ); +!!!flag floatx80_lt_quiet( floatx80, floatx80 ); +!!!flag floatx80_is_signaling_nan( floatx80 ); + +#endif + +#ifdef FLOAT128 + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE quadruple-precision conversion routines. +------------------------------------------------------------------------------- +*/ +!!!int32 float128_to_int32( float128 ); +!!!int32 float128_to_int32_round_to_zero( float128 ); +!!!int64 float128_to_int64( float128 ); +!!!int64 float128_to_int64_round_to_zero( float128 ); +float32 float128_to_float32( float128 ); +float64 float128_to_float64( float128 ); +#ifdef FLOATX80 +floatx80 float128_to_floatx80( float128 ); +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE quadruple-precision operations. +------------------------------------------------------------------------------- +*/ +float128 float128_round_to_int( float128 ); +float128 float128_add( float128, float128 ); +float128 float128_sub( float128, float128 ); +float128 float128_mul( float128, float128 ); +float128 float128_div( float128, float128 ); +float128 float128_rem( float128, float128 ); +float128 float128_sqrt( float128 ); +!!!flag float128_eq( float128, float128 ); +!!!flag float128_le( float128, float128 ); +!!!flag float128_lt( float128, float128 ); +!!!flag float128_eq_signaling( float128, float128 ); +!!!flag float128_le_quiet( float128, float128 ); +!!!flag float128_lt_quiet( float128, float128 ); +!!!flag float128_is_signaling_nan( float128 ); + +#endif + diff --git a/StdLib/LibC/Softfloat/timesoftfloat.c b/StdLib/LibC/Softfloat/timesoftfloat.c new file mode 100644 index 0000000000..c6eabc6ac1 --- /dev/null +++ b/StdLib/LibC/Softfloat/timesoftfloat.c @@ -0,0 +1,2641 @@ +/* $NetBSD: timesoftfloat.c,v 1.1 2000/06/06 08:15:11 bjh21 Exp $ */ + +/* +=============================================================================== + +This C source file is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2a. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these four paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: timesoftfloat.c,v 1.1 2000/06/06 08:15:11 bjh21 Exp $"); +#endif /* LIBC_SCCS and not lint */ + +#include +#include +#include +#include +#include +#include "milieu.h" +#include "softfloat.h" + +enum { + minIterations = 1000 +}; + +static void fail( const char *message, ... ) +{ + va_list varArgs; + + fputs( "timesoftfloat: ", stderr ); + va_start( varArgs, message ); + vfprintf( stderr, message, varArgs ); + va_end( varArgs ); + fputs( ".\n", stderr ); + exit( EXIT_FAILURE ); + +} + +static char *functionName; +static char *roundingPrecisionName, *roundingModeName, *tininessModeName; + +static void reportTime( int32 count, long clocks ) +{ + + printf( + "%8.1f kops/s: %s", + ( count / ( ( (float) clocks ) / CLOCKS_PER_SEC ) ) / 1000, + functionName + ); + if ( roundingModeName ) { + if ( roundingPrecisionName ) { + fputs( ", precision ", stdout ); + fputs( roundingPrecisionName, stdout ); + } + fputs( ", rounding ", stdout ); + fputs( roundingModeName, stdout ); + if ( tininessModeName ) { + fputs( ", tininess ", stdout ); + fputs( tininessModeName, stdout ); + fputs( " rounding", stdout ); + } + } + fputc( '\n', stdout ); + +} + +enum { + numInputs_int32 = 32 +}; + +static const int32 inputs_int32[ numInputs_int32 ] = { + 0xFFFFBB79, 0x405CF80F, 0x00000000, 0xFFFFFD04, + 0xFFF20002, 0x0C8EF795, 0xF00011FF, 0x000006CA, + 0x00009BFE, 0xFF4862E3, 0x9FFFEFFE, 0xFFFFFFB7, + 0x0BFF7FFF, 0x0000F37A, 0x0011DFFE, 0x00000006, + 0xFFF02006, 0xFFFFF7D1, 0x10200003, 0xDE8DF765, + 0x00003E02, 0x000019E8, 0x0008FFFE, 0xFFFFFB5C, + 0xFFDF7FFE, 0x07C42FBF, 0x0FFFE3FF, 0x040B9F13, + 0xBFFFFFF8, 0x0001BF56, 0x000017F6, 0x000A908A +}; + +static void time_a_int32_z_float32( float32 function( int32 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_int32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_int32 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_int32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_int32 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_a_int32_z_float64( float64 function( int32 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_int32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_int32 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_int32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_int32 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#ifdef FLOATX80 + +static void time_a_int32_z_floatx80( floatx80 function( int32 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_int32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_int32 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_int32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_int32 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#endif + +#ifdef FLOAT128 + +static void time_a_int32_z_float128( float128 function( int32 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_int32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_int32 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_int32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_int32 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#endif + +enum { + numInputs_int64 = 32 +}; + +static const int64 inputs_int64[ numInputs_int64 ] = { + LIT64( 0xFBFFC3FFFFFFFFFF ), + LIT64( 0x0000000003C589BC ), + LIT64( 0x00000000400013FE ), + LIT64( 0x0000000000186171 ), + LIT64( 0xFFFFFFFFFFFEFBFA ), + LIT64( 0xFFFFFD79E6DFFC73 ), + LIT64( 0x0000000010001DFF ), + LIT64( 0xDD1A0F0C78513710 ), + LIT64( 0xFFFF83FFFFFEFFFE ), + LIT64( 0x00756EBD1AD0C1C7 ), + LIT64( 0x0003FDFFFFFFFFBE ), + LIT64( 0x0007D0FB2C2CA951 ), + LIT64( 0x0007FC0007FFFFFE ), + LIT64( 0x0000001F942B18BB ), + LIT64( 0x0000080101FFFFFE ), + LIT64( 0xFFFFFFFFFFFF0978 ), + LIT64( 0x000000000008BFFF ), + LIT64( 0x0000000006F5AF08 ), + LIT64( 0xFFDEFF7FFFFFFFFE ), + LIT64( 0x0000000000000003 ), + LIT64( 0x3FFFFFFFFF80007D ), + LIT64( 0x0000000000000078 ), + LIT64( 0xFFF80000007FDFFD ), + LIT64( 0x1BBC775B78016AB0 ), + LIT64( 0xFFF9001FFFFFFFFE ), + LIT64( 0xFFFD4767AB98E43F ), + LIT64( 0xFFFFFEFFFE00001E ), + LIT64( 0xFFFFFFFFFFF04EFD ), + LIT64( 0x07FFFFFFFFFFF7FF ), + LIT64( 0xFFFC9EAA38F89050 ), + LIT64( 0x00000020FBFFFFFE ), + LIT64( 0x0000099AE6455357 ) +}; + +static void time_a_int64_z_float32( float32 function( int64 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_int64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_int64 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_int64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_int64 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_a_int64_z_float64( float64 function( int64 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_int64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_int64 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_int64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_int64 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#ifdef FLOATX80 + +static void time_a_int64_z_floatx80( floatx80 function( int64 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_int64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_int64 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_int64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_int64 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#endif + +#ifdef FLOAT128 + +static void time_a_int64_z_float128( float128 function( int64 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_int64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_int64 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_int64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_int64 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#endif + +enum { + numInputs_float32 = 32 +}; + +static const float32 inputs_float32[ numInputs_float32 ] = { + 0x4EFA0000, 0xC1D0B328, 0x80000000, 0x3E69A31E, + 0xAF803EFF, 0x3F800000, 0x17BF8000, 0xE74A301A, + 0x4E010003, 0x7EE3C75D, 0xBD803FE0, 0xBFFEFF00, + 0x7981F800, 0x431FFFFC, 0xC100C000, 0x3D87EFFF, + 0x4103FEFE, 0xBC000007, 0xBF01F7FF, 0x4E6C6B5C, + 0xC187FFFE, 0xC58B9F13, 0x4F88007F, 0xDF004007, + 0xB7FFD7FE, 0x7E8001FB, 0x46EFFBFF, 0x31C10000, + 0xDB428661, 0x33F89B1F, 0xA3BFEFFF, 0x537BFFBE +}; + +static void time_a_float32_z_int32( int32 function( float32 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_float32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float32 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_float32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float32 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_a_float32_z_int64( int64 function( float32 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_float32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float32 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_float32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float32 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_a_float32_z_float64( float64 function( float32 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_float32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float32 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_float32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float32 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#ifdef FLOATX80 + +static void time_a_float32_z_floatx80( floatx80 function( float32 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_float32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float32 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_float32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float32 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#endif + +#ifdef FLOAT128 + +static void time_a_float32_z_float128( float128 function( float32 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_float32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float32 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_float32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float32 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#endif + +static void time_az_float32( float32 function( float32 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_float32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float32 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_float32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float32 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_ab_float32_z_flag( flag function( float32, float32 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNumA, inputNumB; + + count = 0; + inputNumA = 0; + inputNumB = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( + inputs_float32[ inputNumA ], inputs_float32[ inputNumB ] ); + inputNumA = ( inputNumA + 1 ) & ( numInputs_float32 - 1 ); + if ( inputNumA == 0 ) ++inputNumB; + inputNumB = ( inputNumB + 1 ) & ( numInputs_float32 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNumA = 0; + inputNumB = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( + inputs_float32[ inputNumA ], inputs_float32[ inputNumB ] ); + inputNumA = ( inputNumA + 1 ) & ( numInputs_float32 - 1 ); + if ( inputNumA == 0 ) ++inputNumB; + inputNumB = ( inputNumB + 1 ) & ( numInputs_float32 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_abz_float32( float32 function( float32, float32 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNumA, inputNumB; + + count = 0; + inputNumA = 0; + inputNumB = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( + inputs_float32[ inputNumA ], inputs_float32[ inputNumB ] ); + inputNumA = ( inputNumA + 1 ) & ( numInputs_float32 - 1 ); + if ( inputNumA == 0 ) ++inputNumB; + inputNumB = ( inputNumB + 1 ) & ( numInputs_float32 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNumA = 0; + inputNumB = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( + inputs_float32[ inputNumA ], inputs_float32[ inputNumB ] ); + inputNumA = ( inputNumA + 1 ) & ( numInputs_float32 - 1 ); + if ( inputNumA == 0 ) ++inputNumB; + inputNumB = ( inputNumB + 1 ) & ( numInputs_float32 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static const float32 inputs_float32_pos[ numInputs_float32 ] = { + 0x4EFA0000, 0x41D0B328, 0x00000000, 0x3E69A31E, + 0x2F803EFF, 0x3F800000, 0x17BF8000, 0x674A301A, + 0x4E010003, 0x7EE3C75D, 0x3D803FE0, 0x3FFEFF00, + 0x7981F800, 0x431FFFFC, 0x4100C000, 0x3D87EFFF, + 0x4103FEFE, 0x3C000007, 0x3F01F7FF, 0x4E6C6B5C, + 0x4187FFFE, 0x458B9F13, 0x4F88007F, 0x5F004007, + 0x37FFD7FE, 0x7E8001FB, 0x46EFFBFF, 0x31C10000, + 0x5B428661, 0x33F89B1F, 0x23BFEFFF, 0x537BFFBE +}; + +static void time_az_float32_pos( float32 function( float32 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_float32_pos[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float32 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_float32_pos[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float32 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +enum { + numInputs_float64 = 32 +}; + +static const float64 inputs_float64[ numInputs_float64 ] = { + LIT64( 0x422FFFC008000000 ), + LIT64( 0xB7E0000480000000 ), + LIT64( 0xF3FD2546120B7935 ), + LIT64( 0x3FF0000000000000 ), + LIT64( 0xCE07F766F09588D6 ), + LIT64( 0x8000000000000000 ), + LIT64( 0x3FCE000400000000 ), + LIT64( 0x8313B60F0032BED8 ), + LIT64( 0xC1EFFFFFC0002000 ), + LIT64( 0x3FB3C75D224F2B0F ), + LIT64( 0x7FD00000004000FF ), + LIT64( 0xA12FFF8000001FFF ), + LIT64( 0x3EE0000000FE0000 ), + LIT64( 0x0010000080000004 ), + LIT64( 0x41CFFFFE00000020 ), + LIT64( 0x40303FFFFFFFFFFD ), + LIT64( 0x3FD000003FEFFFFF ), + LIT64( 0xBFD0000010000000 ), + LIT64( 0xB7FC6B5C16CA55CF ), + LIT64( 0x413EEB940B9D1301 ), + LIT64( 0xC7E00200001FFFFF ), + LIT64( 0x47F00021FFFFFFFE ), + LIT64( 0xBFFFFFFFF80000FF ), + LIT64( 0xC07FFFFFE00FFFFF ), + LIT64( 0x001497A63740C5E8 ), + LIT64( 0xC4BFFFE0001FFFFF ), + LIT64( 0x96FFDFFEFFFFFFFF ), + LIT64( 0x403FC000000001FE ), + LIT64( 0xFFD00000000001F6 ), + LIT64( 0x0640400002000000 ), + LIT64( 0x479CEE1E4F789FE0 ), + LIT64( 0xC237FFFFFFFFFDFE ) +}; + +static void time_a_float64_z_int32( int32 function( float64 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_float64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float64 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_float64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float64 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_a_float64_z_int64( int64 function( float64 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_float64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float64 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_float64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float64 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_a_float64_z_float32( float32 function( float64 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_float64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float64 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_float64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float64 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#ifdef FLOATX80 + +static void time_a_float64_z_floatx80( floatx80 function( float64 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_float64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float64 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_float64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float64 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#endif + +#ifdef FLOAT128 + +static void time_a_float64_z_float128( float128 function( float64 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_float64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float64 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_float64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float64 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#endif + +static void time_az_float64( float64 function( float64 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_float64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float64 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_float64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float64 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_ab_float64_z_flag( flag function( float64, float64 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNumA, inputNumB; + + count = 0; + inputNumA = 0; + inputNumB = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( + inputs_float64[ inputNumA ], inputs_float64[ inputNumB ] ); + inputNumA = ( inputNumA + 1 ) & ( numInputs_float64 - 1 ); + if ( inputNumA == 0 ) ++inputNumB; + inputNumB = ( inputNumB + 1 ) & ( numInputs_float64 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNumA = 0; + inputNumB = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( + inputs_float64[ inputNumA ], inputs_float64[ inputNumB ] ); + inputNumA = ( inputNumA + 1 ) & ( numInputs_float64 - 1 ); + if ( inputNumA == 0 ) ++inputNumB; + inputNumB = ( inputNumB + 1 ) & ( numInputs_float64 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_abz_float64( float64 function( float64, float64 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNumA, inputNumB; + + count = 0; + inputNumA = 0; + inputNumB = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( + inputs_float64[ inputNumA ], inputs_float64[ inputNumB ] ); + inputNumA = ( inputNumA + 1 ) & ( numInputs_float64 - 1 ); + if ( inputNumA == 0 ) ++inputNumB; + inputNumB = ( inputNumB + 1 ) & ( numInputs_float64 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNumA = 0; + inputNumB = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( + inputs_float64[ inputNumA ], inputs_float64[ inputNumB ] ); + inputNumA = ( inputNumA + 1 ) & ( numInputs_float64 - 1 ); + if ( inputNumA == 0 ) ++inputNumB; + inputNumB = ( inputNumB + 1 ) & ( numInputs_float64 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static const float64 inputs_float64_pos[ numInputs_float64 ] = { + LIT64( 0x422FFFC008000000 ), + LIT64( 0x37E0000480000000 ), + LIT64( 0x73FD2546120B7935 ), + LIT64( 0x3FF0000000000000 ), + LIT64( 0x4E07F766F09588D6 ), + LIT64( 0x0000000000000000 ), + LIT64( 0x3FCE000400000000 ), + LIT64( 0x0313B60F0032BED8 ), + LIT64( 0x41EFFFFFC0002000 ), + LIT64( 0x3FB3C75D224F2B0F ), + LIT64( 0x7FD00000004000FF ), + LIT64( 0x212FFF8000001FFF ), + LIT64( 0x3EE0000000FE0000 ), + LIT64( 0x0010000080000004 ), + LIT64( 0x41CFFFFE00000020 ), + LIT64( 0x40303FFFFFFFFFFD ), + LIT64( 0x3FD000003FEFFFFF ), + LIT64( 0x3FD0000010000000 ), + LIT64( 0x37FC6B5C16CA55CF ), + LIT64( 0x413EEB940B9D1301 ), + LIT64( 0x47E00200001FFFFF ), + LIT64( 0x47F00021FFFFFFFE ), + LIT64( 0x3FFFFFFFF80000FF ), + LIT64( 0x407FFFFFE00FFFFF ), + LIT64( 0x001497A63740C5E8 ), + LIT64( 0x44BFFFE0001FFFFF ), + LIT64( 0x16FFDFFEFFFFFFFF ), + LIT64( 0x403FC000000001FE ), + LIT64( 0x7FD00000000001F6 ), + LIT64( 0x0640400002000000 ), + LIT64( 0x479CEE1E4F789FE0 ), + LIT64( 0x4237FFFFFFFFFDFE ) +}; + +static void time_az_float64_pos( float64 function( float64 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_float64_pos[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float64 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_float64_pos[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float64 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#ifdef FLOATX80 + +enum { + numInputs_floatx80 = 32 +}; + +static const struct { + bits16 high; + bits64 low; +} inputs_floatx80[ numInputs_floatx80 ] = { + { 0xC03F, LIT64( 0xA9BE15A19C1E8B62 ) }, + { 0x8000, LIT64( 0x0000000000000000 ) }, + { 0x75A8, LIT64( 0xE59591E4788957A5 ) }, + { 0xBFFF, LIT64( 0xFFF0000000000040 ) }, + { 0x0CD8, LIT64( 0xFC000000000007FE ) }, + { 0x43BA, LIT64( 0x99A4000000000000 ) }, + { 0x3FFF, LIT64( 0x8000000000000000 ) }, + { 0x4081, LIT64( 0x94FBF1BCEB5545F0 ) }, + { 0x403E, LIT64( 0xFFF0000000002000 ) }, + { 0x3FFE, LIT64( 0xC860E3C75D224F28 ) }, + { 0x407E, LIT64( 0xFC00000FFFFFFFFE ) }, + { 0x737A, LIT64( 0x800000007FFDFFFE ) }, + { 0x4044, LIT64( 0xFFFFFF80000FFFFF ) }, + { 0xBBFE, LIT64( 0x8000040000001FFE ) }, + { 0xC002, LIT64( 0xFF80000000000020 ) }, + { 0xDE8D, LIT64( 0xFFFFFFFFFFE00004 ) }, + { 0xC004, LIT64( 0x8000000000003FFB ) }, + { 0x407F, LIT64( 0x800000000003FFFE ) }, + { 0xC000, LIT64( 0xA459EE6A5C16CA55 ) }, + { 0x8003, LIT64( 0xC42CBF7399AEEB94 ) }, + { 0xBF7F, LIT64( 0xF800000000000006 ) }, + { 0xC07F, LIT64( 0xBF56BE8871F28FEA ) }, + { 0xC07E, LIT64( 0xFFFF77FFFFFFFFFE ) }, + { 0xADC9, LIT64( 0x8000000FFFFFFFDE ) }, + { 0xC001, LIT64( 0xEFF7FFFFFFFFFFFF ) }, + { 0x4001, LIT64( 0xBE84F30125C497A6 ) }, + { 0xC06B, LIT64( 0xEFFFFFFFFFFFFFFF ) }, + { 0x4080, LIT64( 0xFFFFFFFFBFFFFFFF ) }, + { 0x87E9, LIT64( 0x81FFFFFFFFFFFBFF ) }, + { 0xA63F, LIT64( 0x801FFFFFFEFFFFFE ) }, + { 0x403C, LIT64( 0x801FFFFFFFF7FFFF ) }, + { 0x4018, LIT64( 0x8000000000080003 ) } +}; + +static void time_a_floatx80_z_int32( int32 function( floatx80 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + floatx80 a; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + a.low = inputs_floatx80[ inputNum ].low; + a.high = inputs_floatx80[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_floatx80 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + a.low = inputs_floatx80[ inputNum ].low; + a.high = inputs_floatx80[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_floatx80 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_a_floatx80_z_int64( int64 function( floatx80 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + floatx80 a; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + a.low = inputs_floatx80[ inputNum ].low; + a.high = inputs_floatx80[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_floatx80 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + a.low = inputs_floatx80[ inputNum ].low; + a.high = inputs_floatx80[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_floatx80 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_a_floatx80_z_float32( float32 function( floatx80 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + floatx80 a; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + a.low = inputs_floatx80[ inputNum ].low; + a.high = inputs_floatx80[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_floatx80 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + a.low = inputs_floatx80[ inputNum ].low; + a.high = inputs_floatx80[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_floatx80 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_a_floatx80_z_float64( float64 function( floatx80 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + floatx80 a; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + a.low = inputs_floatx80[ inputNum ].low; + a.high = inputs_floatx80[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_floatx80 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + a.low = inputs_floatx80[ inputNum ].low; + a.high = inputs_floatx80[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_floatx80 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#ifdef FLOAT128 + +static void time_a_floatx80_z_float128( float128 function( floatx80 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + floatx80 a; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + a.low = inputs_floatx80[ inputNum ].low; + a.high = inputs_floatx80[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_floatx80 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + a.low = inputs_floatx80[ inputNum ].low; + a.high = inputs_floatx80[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_floatx80 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#endif + +static void time_az_floatx80( floatx80 function( floatx80 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + floatx80 a; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + a.low = inputs_floatx80[ inputNum ].low; + a.high = inputs_floatx80[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_floatx80 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + a.low = inputs_floatx80[ inputNum ].low; + a.high = inputs_floatx80[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_floatx80 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_ab_floatx80_z_flag( flag function( floatx80, floatx80 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNumA, inputNumB; + floatx80 a, b; + + count = 0; + inputNumA = 0; + inputNumB = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + a.low = inputs_floatx80[ inputNumA ].low; + a.high = inputs_floatx80[ inputNumA ].high; + b.low = inputs_floatx80[ inputNumB ].low; + b.high = inputs_floatx80[ inputNumB ].high; + function( a, b ); + inputNumA = ( inputNumA + 1 ) & ( numInputs_floatx80 - 1 ); + if ( inputNumA == 0 ) ++inputNumB; + inputNumB = ( inputNumB + 1 ) & ( numInputs_floatx80 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNumA = 0; + inputNumB = 0; + startClock = clock(); + for ( i = count; i; --i ) { + a.low = inputs_floatx80[ inputNumA ].low; + a.high = inputs_floatx80[ inputNumA ].high; + b.low = inputs_floatx80[ inputNumB ].low; + b.high = inputs_floatx80[ inputNumB ].high; + function( a, b ); + inputNumA = ( inputNumA + 1 ) & ( numInputs_floatx80 - 1 ); + if ( inputNumA == 0 ) ++inputNumB; + inputNumB = ( inputNumB + 1 ) & ( numInputs_floatx80 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_abz_floatx80( floatx80 function( floatx80, floatx80 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNumA, inputNumB; + floatx80 a, b; + + count = 0; + inputNumA = 0; + inputNumB = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + a.low = inputs_floatx80[ inputNumA ].low; + a.high = inputs_floatx80[ inputNumA ].high; + b.low = inputs_floatx80[ inputNumB ].low; + b.high = inputs_floatx80[ inputNumB ].high; + function( a, b ); + inputNumA = ( inputNumA + 1 ) & ( numInputs_floatx80 - 1 ); + if ( inputNumA == 0 ) ++inputNumB; + inputNumB = ( inputNumB + 1 ) & ( numInputs_floatx80 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNumA = 0; + inputNumB = 0; + startClock = clock(); + for ( i = count; i; --i ) { + a.low = inputs_floatx80[ inputNumA ].low; + a.high = inputs_floatx80[ inputNumA ].high; + b.low = inputs_floatx80[ inputNumB ].low; + b.high = inputs_floatx80[ inputNumB ].high; + function( a, b ); + inputNumA = ( inputNumA + 1 ) & ( numInputs_floatx80 - 1 ); + if ( inputNumA == 0 ) ++inputNumB; + inputNumB = ( inputNumB + 1 ) & ( numInputs_floatx80 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static const struct { + bits16 high; + bits64 low; +} inputs_floatx80_pos[ numInputs_floatx80 ] = { + { 0x403F, LIT64( 0xA9BE15A19C1E8B62 ) }, + { 0x0000, LIT64( 0x0000000000000000 ) }, + { 0x75A8, LIT64( 0xE59591E4788957A5 ) }, + { 0x3FFF, LIT64( 0xFFF0000000000040 ) }, + { 0x0CD8, LIT64( 0xFC000000000007FE ) }, + { 0x43BA, LIT64( 0x99A4000000000000 ) }, + { 0x3FFF, LIT64( 0x8000000000000000 ) }, + { 0x4081, LIT64( 0x94FBF1BCEB5545F0 ) }, + { 0x403E, LIT64( 0xFFF0000000002000 ) }, + { 0x3FFE, LIT64( 0xC860E3C75D224F28 ) }, + { 0x407E, LIT64( 0xFC00000FFFFFFFFE ) }, + { 0x737A, LIT64( 0x800000007FFDFFFE ) }, + { 0x4044, LIT64( 0xFFFFFF80000FFFFF ) }, + { 0x3BFE, LIT64( 0x8000040000001FFE ) }, + { 0x4002, LIT64( 0xFF80000000000020 ) }, + { 0x5E8D, LIT64( 0xFFFFFFFFFFE00004 ) }, + { 0x4004, LIT64( 0x8000000000003FFB ) }, + { 0x407F, LIT64( 0x800000000003FFFE ) }, + { 0x4000, LIT64( 0xA459EE6A5C16CA55 ) }, + { 0x0003, LIT64( 0xC42CBF7399AEEB94 ) }, + { 0x3F7F, LIT64( 0xF800000000000006 ) }, + { 0x407F, LIT64( 0xBF56BE8871F28FEA ) }, + { 0x407E, LIT64( 0xFFFF77FFFFFFFFFE ) }, + { 0x2DC9, LIT64( 0x8000000FFFFFFFDE ) }, + { 0x4001, LIT64( 0xEFF7FFFFFFFFFFFF ) }, + { 0x4001, LIT64( 0xBE84F30125C497A6 ) }, + { 0x406B, LIT64( 0xEFFFFFFFFFFFFFFF ) }, + { 0x4080, LIT64( 0xFFFFFFFFBFFFFFFF ) }, + { 0x07E9, LIT64( 0x81FFFFFFFFFFFBFF ) }, + { 0x263F, LIT64( 0x801FFFFFFEFFFFFE ) }, + { 0x403C, LIT64( 0x801FFFFFFFF7FFFF ) }, + { 0x4018, LIT64( 0x8000000000080003 ) } +}; + +static void time_az_floatx80_pos( floatx80 function( floatx80 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + floatx80 a; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + a.low = inputs_floatx80_pos[ inputNum ].low; + a.high = inputs_floatx80_pos[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_floatx80 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + a.low = inputs_floatx80_pos[ inputNum ].low; + a.high = inputs_floatx80_pos[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_floatx80 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#endif + +#ifdef FLOAT128 + +enum { + numInputs_float128 = 32 +}; + +static const struct { + bits64 high, low; +} inputs_float128[ numInputs_float128 ] = { + { LIT64( 0x3FDA200000100000 ), LIT64( 0x0000000000000000 ) }, + { LIT64( 0x3FFF000000000000 ), LIT64( 0x0000000000000000 ) }, + { LIT64( 0x85F14776190C8306 ), LIT64( 0xD8715F4E3D54BB92 ) }, + { LIT64( 0xF2B00000007FFFFF ), LIT64( 0xFFFFFFFFFFF7FFFF ) }, + { LIT64( 0x8000000000000000 ), LIT64( 0x0000000000000000 ) }, + { LIT64( 0xBFFFFFFFFFE00000 ), LIT64( 0x0000008000000000 ) }, + { LIT64( 0x407F1719CE722F3E ), LIT64( 0xDA6B3FE5FF29425B ) }, + { LIT64( 0x43FFFF8000000000 ), LIT64( 0x0000000000400000 ) }, + { LIT64( 0x401E000000000100 ), LIT64( 0x0000000000002000 ) }, + { LIT64( 0x3FFED71DACDA8E47 ), LIT64( 0x4860E3C75D224F28 ) }, + { LIT64( 0xBF7ECFC1E90647D1 ), LIT64( 0x7A124FE55623EE44 ) }, + { LIT64( 0x0DF7007FFFFFFFFF ), LIT64( 0xFFFFFFFFEFFFFFFF ) }, + { LIT64( 0x3FE5FFEFFFFFFFFF ), LIT64( 0xFFFFFFFFFFFFEFFF ) }, + { LIT64( 0x403FFFFFFFFFFFFF ), LIT64( 0xFFFFFFFFFFFFFBFE ) }, + { LIT64( 0xBFFB2FBF7399AFEB ), LIT64( 0xA459EE6A5C16CA55 ) }, + { LIT64( 0xBDB8FFFFFFFFFFFC ), LIT64( 0x0000000000000400 ) }, + { LIT64( 0x3FC8FFDFFFFFFFFF ), LIT64( 0xFFFFFFFFF0000000 ) }, + { LIT64( 0x3FFBFFFFFFDFFFFF ), LIT64( 0xFFF8000000000000 ) }, + { LIT64( 0x407043C11737BE84 ), LIT64( 0xDDD58212ADC937F4 ) }, + { LIT64( 0x8001000000000000 ), LIT64( 0x0000001000000001 ) }, + { LIT64( 0xC036FFFFFFFFFFFF ), LIT64( 0xFE40000000000000 ) }, + { LIT64( 0x4002FFFFFE000002 ), LIT64( 0x0000000000000000 ) }, + { LIT64( 0x4000C3FEDE897773 ), LIT64( 0x326AC4FD8EFBE6DC ) }, + { LIT64( 0xBFFF0000000FFFFF ), LIT64( 0xFFFFFE0000000000 ) }, + { LIT64( 0x62C3E502146E426D ), LIT64( 0x43F3CAA0DC7DF1A0 ) }, + { LIT64( 0xB5CBD32E52BB570E ), LIT64( 0xBCC477CB11C6236C ) }, + { LIT64( 0xE228FFFFFFC00000 ), LIT64( 0x0000000000000000 ) }, + { LIT64( 0x3F80000000000000 ), LIT64( 0x0000000080000008 ) }, + { LIT64( 0xC1AFFFDFFFFFFFFF ), LIT64( 0xFFFC000000000000 ) }, + { LIT64( 0xC96F000000000000 ), LIT64( 0x00000001FFFBFFFF ) }, + { LIT64( 0x3DE09BFE7923A338 ), LIT64( 0xBCC8FBBD7CEC1F4F ) }, + { LIT64( 0x401CFFFFFFFFFFFF ), LIT64( 0xFFFFFFFEFFFFFF80 ) } +}; + +static void time_a_float128_z_int32( int32 function( float128 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + float128 a; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + a.low = inputs_float128[ inputNum ].low; + a.high = inputs_float128[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_float128 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + a.low = inputs_float128[ inputNum ].low; + a.high = inputs_float128[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_float128 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_a_float128_z_int64( int64 function( float128 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + float128 a; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + a.low = inputs_float128[ inputNum ].low; + a.high = inputs_float128[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_float128 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + a.low = inputs_float128[ inputNum ].low; + a.high = inputs_float128[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_float128 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_a_float128_z_float32( float32 function( float128 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + float128 a; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + a.low = inputs_float128[ inputNum ].low; + a.high = inputs_float128[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_float128 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + a.low = inputs_float128[ inputNum ].low; + a.high = inputs_float128[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_float128 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_a_float128_z_float64( float64 function( float128 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + float128 a; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + a.low = inputs_float128[ inputNum ].low; + a.high = inputs_float128[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_float128 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + a.low = inputs_float128[ inputNum ].low; + a.high = inputs_float128[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_float128 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#ifdef FLOATX80 + +static void time_a_float128_z_floatx80( floatx80 function( float128 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + float128 a; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + a.low = inputs_float128[ inputNum ].low; + a.high = inputs_float128[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_float128 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + a.low = inputs_float128[ inputNum ].low; + a.high = inputs_float128[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_float128 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#endif + +static void time_az_float128( float128 function( float128 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + float128 a; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + a.low = inputs_float128[ inputNum ].low; + a.high = inputs_float128[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_float128 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + a.low = inputs_float128[ inputNum ].low; + a.high = inputs_float128[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_float128 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_ab_float128_z_flag( flag function( float128, float128 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNumA, inputNumB; + float128 a, b; + + count = 0; + inputNumA = 0; + inputNumB = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + a.low = inputs_float128[ inputNumA ].low; + a.high = inputs_float128[ inputNumA ].high; + b.low = inputs_float128[ inputNumB ].low; + b.high = inputs_float128[ inputNumB ].high; + function( a, b ); + inputNumA = ( inputNumA + 1 ) & ( numInputs_float128 - 1 ); + if ( inputNumA == 0 ) ++inputNumB; + inputNumB = ( inputNumB + 1 ) & ( numInputs_float128 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNumA = 0; + inputNumB = 0; + startClock = clock(); + for ( i = count; i; --i ) { + a.low = inputs_float128[ inputNumA ].low; + a.high = inputs_float128[ inputNumA ].high; + b.low = inputs_float128[ inputNumB ].low; + b.high = inputs_float128[ inputNumB ].high; + function( a, b ); + inputNumA = ( inputNumA + 1 ) & ( numInputs_float128 - 1 ); + if ( inputNumA == 0 ) ++inputNumB; + inputNumB = ( inputNumB + 1 ) & ( numInputs_float128 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_abz_float128( float128 function( float128, float128 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNumA, inputNumB; + float128 a, b; + + count = 0; + inputNumA = 0; + inputNumB = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + a.low = inputs_float128[ inputNumA ].low; + a.high = inputs_float128[ inputNumA ].high; + b.low = inputs_float128[ inputNumB ].low; + b.high = inputs_float128[ inputNumB ].high; + function( a, b ); + inputNumA = ( inputNumA + 1 ) & ( numInputs_float128 - 1 ); + if ( inputNumA == 0 ) ++inputNumB; + inputNumB = ( inputNumB + 1 ) & ( numInputs_float128 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNumA = 0; + inputNumB = 0; + startClock = clock(); + for ( i = count; i; --i ) { + a.low = inputs_float128[ inputNumA ].low; + a.high = inputs_float128[ inputNumA ].high; + b.low = inputs_float128[ inputNumB ].low; + b.high = inputs_float128[ inputNumB ].high; + function( a, b ); + inputNumA = ( inputNumA + 1 ) & ( numInputs_float128 - 1 ); + if ( inputNumA == 0 ) ++inputNumB; + inputNumB = ( inputNumB + 1 ) & ( numInputs_float128 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static const struct { + bits64 high, low; +} inputs_float128_pos[ numInputs_float128 ] = { + { LIT64( 0x3FDA200000100000 ), LIT64( 0x0000000000000000 ) }, + { LIT64( 0x3FFF000000000000 ), LIT64( 0x0000000000000000 ) }, + { LIT64( 0x05F14776190C8306 ), LIT64( 0xD8715F4E3D54BB92 ) }, + { LIT64( 0x72B00000007FFFFF ), LIT64( 0xFFFFFFFFFFF7FFFF ) }, + { LIT64( 0x0000000000000000 ), LIT64( 0x0000000000000000 ) }, + { LIT64( 0x3FFFFFFFFFE00000 ), LIT64( 0x0000008000000000 ) }, + { LIT64( 0x407F1719CE722F3E ), LIT64( 0xDA6B3FE5FF29425B ) }, + { LIT64( 0x43FFFF8000000000 ), LIT64( 0x0000000000400000 ) }, + { LIT64( 0x401E000000000100 ), LIT64( 0x0000000000002000 ) }, + { LIT64( 0x3FFED71DACDA8E47 ), LIT64( 0x4860E3C75D224F28 ) }, + { LIT64( 0x3F7ECFC1E90647D1 ), LIT64( 0x7A124FE55623EE44 ) }, + { LIT64( 0x0DF7007FFFFFFFFF ), LIT64( 0xFFFFFFFFEFFFFFFF ) }, + { LIT64( 0x3FE5FFEFFFFFFFFF ), LIT64( 0xFFFFFFFFFFFFEFFF ) }, + { LIT64( 0x403FFFFFFFFFFFFF ), LIT64( 0xFFFFFFFFFFFFFBFE ) }, + { LIT64( 0x3FFB2FBF7399AFEB ), LIT64( 0xA459EE6A5C16CA55 ) }, + { LIT64( 0x3DB8FFFFFFFFFFFC ), LIT64( 0x0000000000000400 ) }, + { LIT64( 0x3FC8FFDFFFFFFFFF ), LIT64( 0xFFFFFFFFF0000000 ) }, + { LIT64( 0x3FFBFFFFFFDFFFFF ), LIT64( 0xFFF8000000000000 ) }, + { LIT64( 0x407043C11737BE84 ), LIT64( 0xDDD58212ADC937F4 ) }, + { LIT64( 0x0001000000000000 ), LIT64( 0x0000001000000001 ) }, + { LIT64( 0x4036FFFFFFFFFFFF ), LIT64( 0xFE40000000000000 ) }, + { LIT64( 0x4002FFFFFE000002 ), LIT64( 0x0000000000000000 ) }, + { LIT64( 0x4000C3FEDE897773 ), LIT64( 0x326AC4FD8EFBE6DC ) }, + { LIT64( 0x3FFF0000000FFFFF ), LIT64( 0xFFFFFE0000000000 ) }, + { LIT64( 0x62C3E502146E426D ), LIT64( 0x43F3CAA0DC7DF1A0 ) }, + { LIT64( 0x35CBD32E52BB570E ), LIT64( 0xBCC477CB11C6236C ) }, + { LIT64( 0x6228FFFFFFC00000 ), LIT64( 0x0000000000000000 ) }, + { LIT64( 0x3F80000000000000 ), LIT64( 0x0000000080000008 ) }, + { LIT64( 0x41AFFFDFFFFFFFFF ), LIT64( 0xFFFC000000000000 ) }, + { LIT64( 0x496F000000000000 ), LIT64( 0x00000001FFFBFFFF ) }, + { LIT64( 0x3DE09BFE7923A338 ), LIT64( 0xBCC8FBBD7CEC1F4F ) }, + { LIT64( 0x401CFFFFFFFFFFFF ), LIT64( 0xFFFFFFFEFFFFFF80 ) } +}; + +static void time_az_float128_pos( float128 function( float128 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + float128 a; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + a.low = inputs_float128_pos[ inputNum ].low; + a.high = inputs_float128_pos[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_float128 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + a.low = inputs_float128_pos[ inputNum ].low; + a.high = inputs_float128_pos[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_float128 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#endif + +enum { + INT32_TO_FLOAT32 = 1, + INT32_TO_FLOAT64, +#ifdef FLOATX80 + INT32_TO_FLOATX80, +#endif +#ifdef FLOAT128 + INT32_TO_FLOAT128, +#endif + INT64_TO_FLOAT32, + INT64_TO_FLOAT64, +#ifdef FLOATX80 + INT64_TO_FLOATX80, +#endif +#ifdef FLOAT128 + INT64_TO_FLOAT128, +#endif + FLOAT32_TO_INT32, + FLOAT32_TO_INT32_ROUND_TO_ZERO, + FLOAT32_TO_INT64, + FLOAT32_TO_INT64_ROUND_TO_ZERO, + FLOAT32_TO_FLOAT64, +#ifdef FLOATX80 + FLOAT32_TO_FLOATX80, +#endif +#ifdef FLOAT128 + FLOAT32_TO_FLOAT128, +#endif + FLOAT32_ROUND_TO_INT, + FLOAT32_ADD, + FLOAT32_SUB, + FLOAT32_MUL, + FLOAT32_DIV, + FLOAT32_REM, + FLOAT32_SQRT, + FLOAT32_EQ, + FLOAT32_LE, + FLOAT32_LT, + FLOAT32_EQ_SIGNALING, + FLOAT32_LE_QUIET, + FLOAT32_LT_QUIET, + FLOAT64_TO_INT32, + FLOAT64_TO_INT32_ROUND_TO_ZERO, + FLOAT64_TO_INT64, + FLOAT64_TO_INT64_ROUND_TO_ZERO, + FLOAT64_TO_FLOAT32, +#ifdef FLOATX80 + FLOAT64_TO_FLOATX80, +#endif +#ifdef FLOAT128 + FLOAT64_TO_FLOAT128, +#endif + FLOAT64_ROUND_TO_INT, + FLOAT64_ADD, + FLOAT64_SUB, + FLOAT64_MUL, + FLOAT64_DIV, + FLOAT64_REM, + FLOAT64_SQRT, + FLOAT64_EQ, + FLOAT64_LE, + FLOAT64_LT, + FLOAT64_EQ_SIGNALING, + FLOAT64_LE_QUIET, + FLOAT64_LT_QUIET, +#ifdef FLOATX80 + FLOATX80_TO_INT32, + FLOATX80_TO_INT32_ROUND_TO_ZERO, + FLOATX80_TO_INT64, + FLOATX80_TO_INT64_ROUND_TO_ZERO, + FLOATX80_TO_FLOAT32, + FLOATX80_TO_FLOAT64, +#ifdef FLOAT128 + FLOATX80_TO_FLOAT128, +#endif + FLOATX80_ROUND_TO_INT, + FLOATX80_ADD, + FLOATX80_SUB, + FLOATX80_MUL, + FLOATX80_DIV, + FLOATX80_REM, + FLOATX80_SQRT, + FLOATX80_EQ, + FLOATX80_LE, + FLOATX80_LT, + FLOATX80_EQ_SIGNALING, + FLOATX80_LE_QUIET, + FLOATX80_LT_QUIET, +#endif +#ifdef FLOAT128 + FLOAT128_TO_INT32, + FLOAT128_TO_INT32_ROUND_TO_ZERO, + FLOAT128_TO_INT64, + FLOAT128_TO_INT64_ROUND_TO_ZERO, + FLOAT128_TO_FLOAT32, + FLOAT128_TO_FLOAT64, +#ifdef FLOATX80 + FLOAT128_TO_FLOATX80, +#endif + FLOAT128_ROUND_TO_INT, + FLOAT128_ADD, + FLOAT128_SUB, + FLOAT128_MUL, + FLOAT128_DIV, + FLOAT128_REM, + FLOAT128_SQRT, + FLOAT128_EQ, + FLOAT128_LE, + FLOAT128_LT, + FLOAT128_EQ_SIGNALING, + FLOAT128_LE_QUIET, + FLOAT128_LT_QUIET, +#endif + NUM_FUNCTIONS +}; + +static struct { + char *name; + int8 numInputs; + flag roundingPrecision, roundingMode; + flag tininessMode, tininessModeAtReducedPrecision; +} functions[ NUM_FUNCTIONS ] = { + { 0, 0, 0, 0, 0, 0 }, + { "int32_to_float32", 1, FALSE, TRUE, FALSE, FALSE }, + { "int32_to_float64", 1, FALSE, FALSE, FALSE, FALSE }, +#ifdef FLOATX80 + { "int32_to_floatx80", 1, FALSE, FALSE, FALSE, FALSE }, +#endif +#ifdef FLOAT128 + { "int32_to_float128", 1, FALSE, FALSE, FALSE, FALSE }, +#endif + { "int64_to_float32", 1, FALSE, TRUE, FALSE, FALSE }, + { "int64_to_float64", 1, FALSE, TRUE, FALSE, FALSE }, +#ifdef FLOATX80 + { "int64_to_floatx80", 1, FALSE, FALSE, FALSE, FALSE }, +#endif +#ifdef FLOAT128 + { "int64_to_float128", 1, FALSE, FALSE, FALSE, FALSE }, +#endif + { "float32_to_int32", 1, FALSE, TRUE, FALSE, FALSE }, + { "float32_to_int32_round_to_zero", 1, FALSE, FALSE, FALSE, FALSE }, + { "float32_to_int64", 1, FALSE, TRUE, FALSE, FALSE }, + { "float32_to_int64_round_to_zero", 1, FALSE, FALSE, FALSE, FALSE }, + { "float32_to_float64", 1, FALSE, FALSE, FALSE, FALSE }, +#ifdef FLOATX80 + { "float32_to_floatx80", 1, FALSE, FALSE, FALSE, FALSE }, +#endif +#ifdef FLOAT128 + { "float32_to_float128", 1, FALSE, FALSE, FALSE, FALSE }, +#endif + { "float32_round_to_int", 1, FALSE, TRUE, FALSE, FALSE }, + { "float32_add", 2, FALSE, TRUE, FALSE, FALSE }, + { "float32_sub", 2, FALSE, TRUE, FALSE, FALSE }, + { "float32_mul", 2, FALSE, TRUE, TRUE, FALSE }, + { "float32_div", 2, FALSE, TRUE, FALSE, FALSE }, + { "float32_rem", 2, FALSE, FALSE, FALSE, FALSE }, + { "float32_sqrt", 1, FALSE, TRUE, FALSE, FALSE }, + { "float32_eq", 2, FALSE, FALSE, FALSE, FALSE }, + { "float32_le", 2, FALSE, FALSE, FALSE, FALSE }, + { "float32_lt", 2, FALSE, FALSE, FALSE, FALSE }, + { "float32_eq_signaling", 2, FALSE, FALSE, FALSE, FALSE }, + { "float32_le_quiet", 2, FALSE, FALSE, FALSE, FALSE }, + { "float32_lt_quiet", 2, FALSE, FALSE, FALSE, FALSE }, + { "float64_to_int32", 1, FALSE, TRUE, FALSE, FALSE }, + { "float64_to_int32_round_to_zero", 1, FALSE, FALSE, FALSE, FALSE }, + { "float64_to_int64", 1, FALSE, TRUE, FALSE, FALSE }, + { "float64_to_int64_round_to_zero", 1, FALSE, FALSE, FALSE, FALSE }, + { "float64_to_float32", 1, FALSE, TRUE, TRUE, FALSE }, +#ifdef FLOATX80 + { "float64_to_floatx80", 1, FALSE, FALSE, FALSE, FALSE }, +#endif +#ifdef FLOAT128 + { "float64_to_float128", 1, FALSE, FALSE, FALSE, FALSE }, +#endif + { "float64_round_to_int", 1, FALSE, TRUE, FALSE, FALSE }, + { "float64_add", 2, FALSE, TRUE, FALSE, FALSE }, + { "float64_sub", 2, FALSE, TRUE, FALSE, FALSE }, + { "float64_mul", 2, FALSE, TRUE, TRUE, FALSE }, + { "float64_div", 2, FALSE, TRUE, FALSE, FALSE }, + { "float64_rem", 2, FALSE, FALSE, FALSE, FALSE }, + { "float64_sqrt", 1, FALSE, TRUE, FALSE, FALSE }, + { "float64_eq", 2, FALSE, FALSE, FALSE, FALSE }, + { "float64_le", 2, FALSE, FALSE, FALSE, FALSE }, + { "float64_lt", 2, FALSE, FALSE, FALSE, FALSE }, + { "float64_eq_signaling", 2, FALSE, FALSE, FALSE, FALSE }, + { "float64_le_quiet", 2, FALSE, FALSE, FALSE, FALSE }, + { "float64_lt_quiet", 2, FALSE, FALSE, FALSE, FALSE }, +#ifdef FLOATX80 + { "floatx80_to_int32", 1, FALSE, TRUE, FALSE, FALSE }, + { "floatx80_to_int32_round_to_zero", 1, FALSE, FALSE, FALSE, FALSE }, + { "floatx80_to_int64", 1, FALSE, TRUE, FALSE, FALSE }, + { "floatx80_to_int64_round_to_zero", 1, FALSE, FALSE, FALSE, FALSE }, + { "floatx80_to_float32", 1, FALSE, TRUE, TRUE, FALSE }, + { "floatx80_to_float64", 1, FALSE, TRUE, TRUE, FALSE }, +#ifdef FLOAT128 + { "floatx80_to_float128", 1, FALSE, FALSE, FALSE, FALSE }, +#endif + { "floatx80_round_to_int", 1, FALSE, TRUE, FALSE, FALSE }, + { "floatx80_add", 2, TRUE, TRUE, FALSE, TRUE }, + { "floatx80_sub", 2, TRUE, TRUE, FALSE, TRUE }, + { "floatx80_mul", 2, TRUE, TRUE, TRUE, TRUE }, + { "floatx80_div", 2, TRUE, TRUE, FALSE, TRUE }, + { "floatx80_rem", 2, FALSE, FALSE, FALSE, FALSE }, + { "floatx80_sqrt", 1, TRUE, TRUE, FALSE, FALSE }, + { "floatx80_eq", 2, FALSE, FALSE, FALSE, FALSE }, + { "floatx80_le", 2, FALSE, FALSE, FALSE, FALSE }, + { "floatx80_lt", 2, FALSE, FALSE, FALSE, FALSE }, + { "floatx80_eq_signaling", 2, FALSE, FALSE, FALSE, FALSE }, + { "floatx80_le_quiet", 2, FALSE, FALSE, FALSE, FALSE }, + { "floatx80_lt_quiet", 2, FALSE, FALSE, FALSE, FALSE }, +#endif +#ifdef FLOAT128 + { "float128_to_int32", 1, FALSE, TRUE, FALSE, FALSE }, + { "float128_to_int32_round_to_zero", 1, FALSE, FALSE, FALSE, FALSE }, + { "float128_to_int64", 1, FALSE, TRUE, FALSE, FALSE }, + { "float128_to_int64_round_to_zero", 1, FALSE, FALSE, FALSE, FALSE }, + { "float128_to_float32", 1, FALSE, TRUE, TRUE, FALSE }, + { "float128_to_float64", 1, FALSE, TRUE, TRUE, FALSE }, +#ifdef FLOATX80 + { "float128_to_floatx80", 1, FALSE, TRUE, TRUE, FALSE }, +#endif + { "float128_round_to_int", 1, FALSE, TRUE, FALSE, FALSE }, + { "float128_add", 2, FALSE, TRUE, FALSE, FALSE }, + { "float128_sub", 2, FALSE, TRUE, FALSE, FALSE }, + { "float128_mul", 2, FALSE, TRUE, TRUE, FALSE }, + { "float128_div", 2, FALSE, TRUE, FALSE, FALSE }, + { "float128_rem", 2, FALSE, FALSE, FALSE, FALSE }, + { "float128_sqrt", 1, FALSE, TRUE, FALSE, FALSE }, + { "float128_eq", 2, FALSE, FALSE, FALSE, FALSE }, + { "float128_le", 2, FALSE, FALSE, FALSE, FALSE }, + { "float128_lt", 2, FALSE, FALSE, FALSE, FALSE }, + { "float128_eq_signaling", 2, FALSE, FALSE, FALSE, FALSE }, + { "float128_le_quiet", 2, FALSE, FALSE, FALSE, FALSE }, + { "float128_lt_quiet", 2, FALSE, FALSE, FALSE, FALSE }, +#endif +}; + +enum { + ROUND_NEAREST_EVEN = 1, + ROUND_TO_ZERO, + ROUND_DOWN, + ROUND_UP, + NUM_ROUNDINGMODES +}; +enum { + TININESS_BEFORE_ROUNDING = 1, + TININESS_AFTER_ROUNDING, + NUM_TININESSMODES +}; + +static void + timeFunctionVariety( + uint8 functionCode, + int8 roundingPrecision, + int8 roundingMode, + int8 tininessMode + ) +{ + uint8 roundingCode; + int8 tininessCode; + + functionName = functions[ functionCode ].name; + if ( roundingPrecision == 32 ) { + roundingPrecisionName = "32"; + } + else if ( roundingPrecision == 64 ) { + roundingPrecisionName = "64"; + } + else if ( roundingPrecision == 80 ) { + roundingPrecisionName = "80"; + } + else { + roundingPrecisionName = 0; + } +#ifdef FLOATX80 + floatx80_rounding_precision = roundingPrecision; +#endif + switch ( roundingMode ) { + case 0: + roundingModeName = 0; + roundingCode = float_round_nearest_even; + break; + case ROUND_NEAREST_EVEN: + roundingModeName = "nearest_even"; + roundingCode = float_round_nearest_even; + break; + case ROUND_TO_ZERO: + roundingModeName = "to_zero"; + roundingCode = float_round_to_zero; + break; + case ROUND_DOWN: + roundingModeName = "down"; + roundingCode = float_round_down; + break; + case ROUND_UP: + roundingModeName = "up"; + roundingCode = float_round_up; + break; + } + float_rounding_mode = roundingCode; + switch ( tininessMode ) { + case 0: + tininessModeName = 0; + tininessCode = float_tininess_after_rounding; + break; + case TININESS_BEFORE_ROUNDING: + tininessModeName = "before"; + tininessCode = float_tininess_before_rounding; + break; + case TININESS_AFTER_ROUNDING: + tininessModeName = "after"; + tininessCode = float_tininess_after_rounding; + break; + } + float_detect_tininess = tininessCode; + switch ( functionCode ) { + case INT32_TO_FLOAT32: + time_a_int32_z_float32( int32_to_float32 ); + break; + case INT32_TO_FLOAT64: + time_a_int32_z_float64( int32_to_float64 ); + break; +#ifdef FLOATX80 + case INT32_TO_FLOATX80: + time_a_int32_z_floatx80( int32_to_floatx80 ); + break; +#endif +#ifdef FLOAT128 + case INT32_TO_FLOAT128: + time_a_int32_z_float128( int32_to_float128 ); + break; +#endif + case INT64_TO_FLOAT32: + time_a_int64_z_float32( int64_to_float32 ); + break; + case INT64_TO_FLOAT64: + time_a_int64_z_float64( int64_to_float64 ); + break; +#ifdef FLOATX80 + case INT64_TO_FLOATX80: + time_a_int64_z_floatx80( int64_to_floatx80 ); + break; +#endif +#ifdef FLOAT128 + case INT64_TO_FLOAT128: + time_a_int64_z_float128( int64_to_float128 ); + break; +#endif + case FLOAT32_TO_INT32: + time_a_float32_z_int32( float32_to_int32 ); + break; + case FLOAT32_TO_INT32_ROUND_TO_ZERO: + time_a_float32_z_int32( float32_to_int32_round_to_zero ); + break; + case FLOAT32_TO_INT64: + time_a_float32_z_int64( float32_to_int64 ); + break; + case FLOAT32_TO_INT64_ROUND_TO_ZERO: + time_a_float32_z_int64( float32_to_int64_round_to_zero ); + break; + case FLOAT32_TO_FLOAT64: + time_a_float32_z_float64( float32_to_float64 ); + break; +#ifdef FLOATX80 + case FLOAT32_TO_FLOATX80: + time_a_float32_z_floatx80( float32_to_floatx80 ); + break; +#endif +#ifdef FLOAT128 + case FLOAT32_TO_FLOAT128: + time_a_float32_z_float128( float32_to_float128 ); + break; +#endif + case FLOAT32_ROUND_TO_INT: + time_az_float32( float32_round_to_int ); + break; + case FLOAT32_ADD: + time_abz_float32( float32_add ); + break; + case FLOAT32_SUB: + time_abz_float32( float32_sub ); + break; + case FLOAT32_MUL: + time_abz_float32( float32_mul ); + break; + case FLOAT32_DIV: + time_abz_float32( float32_div ); + break; + case FLOAT32_REM: + time_abz_float32( float32_rem ); + break; + case FLOAT32_SQRT: + time_az_float32_pos( float32_sqrt ); + break; + case FLOAT32_EQ: + time_ab_float32_z_flag( float32_eq ); + break; + case FLOAT32_LE: + time_ab_float32_z_flag( float32_le ); + break; + case FLOAT32_LT: + time_ab_float32_z_flag( float32_lt ); + break; + case FLOAT32_EQ_SIGNALING: + time_ab_float32_z_flag( float32_eq_signaling ); + break; + case FLOAT32_LE_QUIET: + time_ab_float32_z_flag( float32_le_quiet ); + break; + case FLOAT32_LT_QUIET: + time_ab_float32_z_flag( float32_lt_quiet ); + break; + case FLOAT64_TO_INT32: + time_a_float64_z_int32( float64_to_int32 ); + break; + case FLOAT64_TO_INT32_ROUND_TO_ZERO: + time_a_float64_z_int32( float64_to_int32_round_to_zero ); + break; + case FLOAT64_TO_INT64: + time_a_float64_z_int64( float64_to_int64 ); + break; + case FLOAT64_TO_INT64_ROUND_TO_ZERO: + time_a_float64_z_int64( float64_to_int64_round_to_zero ); + break; + case FLOAT64_TO_FLOAT32: + time_a_float64_z_float32( float64_to_float32 ); + break; +#ifdef FLOATX80 + case FLOAT64_TO_FLOATX80: + time_a_float64_z_floatx80( float64_to_floatx80 ); + break; +#endif +#ifdef FLOAT128 + case FLOAT64_TO_FLOAT128: + time_a_float64_z_float128( float64_to_float128 ); + break; +#endif + case FLOAT64_ROUND_TO_INT: + time_az_float64( float64_round_to_int ); + break; + case FLOAT64_ADD: + time_abz_float64( float64_add ); + break; + case FLOAT64_SUB: + time_abz_float64( float64_sub ); + break; + case FLOAT64_MUL: + time_abz_float64( float64_mul ); + break; + case FLOAT64_DIV: + time_abz_float64( float64_div ); + break; + case FLOAT64_REM: + time_abz_float64( float64_rem ); + break; + case FLOAT64_SQRT: + time_az_float64_pos( float64_sqrt ); + break; + case FLOAT64_EQ: + time_ab_float64_z_flag( float64_eq ); + break; + case FLOAT64_LE: + time_ab_float64_z_flag( float64_le ); + break; + case FLOAT64_LT: + time_ab_float64_z_flag( float64_lt ); + break; + case FLOAT64_EQ_SIGNALING: + time_ab_float64_z_flag( float64_eq_signaling ); + break; + case FLOAT64_LE_QUIET: + time_ab_float64_z_flag( float64_le_quiet ); + break; + case FLOAT64_LT_QUIET: + time_ab_float64_z_flag( float64_lt_quiet ); + break; +#ifdef FLOATX80 + case FLOATX80_TO_INT32: + time_a_floatx80_z_int32( floatx80_to_int32 ); + break; + case FLOATX80_TO_INT32_ROUND_TO_ZERO: + time_a_floatx80_z_int32( floatx80_to_int32_round_to_zero ); + break; + case FLOATX80_TO_INT64: + time_a_floatx80_z_int64( floatx80_to_int64 ); + break; + case FLOATX80_TO_INT64_ROUND_TO_ZERO: + time_a_floatx80_z_int64( floatx80_to_int64_round_to_zero ); + break; + case FLOATX80_TO_FLOAT32: + time_a_floatx80_z_float32( floatx80_to_float32 ); + break; + case FLOATX80_TO_FLOAT64: + time_a_floatx80_z_float64( floatx80_to_float64 ); + break; +#ifdef FLOAT128 + case FLOATX80_TO_FLOAT128: + time_a_floatx80_z_float128( floatx80_to_float128 ); + break; +#endif + case FLOATX80_ROUND_TO_INT: + time_az_floatx80( floatx80_round_to_int ); + break; + case FLOATX80_ADD: + time_abz_floatx80( floatx80_add ); + break; + case FLOATX80_SUB: + time_abz_floatx80( floatx80_sub ); + break; + case FLOATX80_MUL: + time_abz_floatx80( floatx80_mul ); + break; + case FLOATX80_DIV: + time_abz_floatx80( floatx80_div ); + break; + case FLOATX80_REM: + time_abz_floatx80( floatx80_rem ); + break; + case FLOATX80_SQRT: + time_az_floatx80_pos( floatx80_sqrt ); + break; + case FLOATX80_EQ: + time_ab_floatx80_z_flag( floatx80_eq ); + break; + case FLOATX80_LE: + time_ab_floatx80_z_flag( floatx80_le ); + break; + case FLOATX80_LT: + time_ab_floatx80_z_flag( floatx80_lt ); + break; + case FLOATX80_EQ_SIGNALING: + time_ab_floatx80_z_flag( floatx80_eq_signaling ); + break; + case FLOATX80_LE_QUIET: + time_ab_floatx80_z_flag( floatx80_le_quiet ); + break; + case FLOATX80_LT_QUIET: + time_ab_floatx80_z_flag( floatx80_lt_quiet ); + break; +#endif +#ifdef FLOAT128 + case FLOAT128_TO_INT32: + time_a_float128_z_int32( float128_to_int32 ); + break; + case FLOAT128_TO_INT32_ROUND_TO_ZERO: + time_a_float128_z_int32( float128_to_int32_round_to_zero ); + break; + case FLOAT128_TO_INT64: + time_a_float128_z_int64( float128_to_int64 ); + break; + case FLOAT128_TO_INT64_ROUND_TO_ZERO: + time_a_float128_z_int64( float128_to_int64_round_to_zero ); + break; + case FLOAT128_TO_FLOAT32: + time_a_float128_z_float32( float128_to_float32 ); + break; + case FLOAT128_TO_FLOAT64: + time_a_float128_z_float64( float128_to_float64 ); + break; +#ifdef FLOATX80 + case FLOAT128_TO_FLOATX80: + time_a_float128_z_floatx80( float128_to_floatx80 ); + break; +#endif + case FLOAT128_ROUND_TO_INT: + time_az_float128( float128_round_to_int ); + break; + case FLOAT128_ADD: + time_abz_float128( float128_add ); + break; + case FLOAT128_SUB: + time_abz_float128( float128_sub ); + break; + case FLOAT128_MUL: + time_abz_float128( float128_mul ); + break; + case FLOAT128_DIV: + time_abz_float128( float128_div ); + break; + case FLOAT128_REM: + time_abz_float128( float128_rem ); + break; + case FLOAT128_SQRT: + time_az_float128_pos( float128_sqrt ); + break; + case FLOAT128_EQ: + time_ab_float128_z_flag( float128_eq ); + break; + case FLOAT128_LE: + time_ab_float128_z_flag( float128_le ); + break; + case FLOAT128_LT: + time_ab_float128_z_flag( float128_lt ); + break; + case FLOAT128_EQ_SIGNALING: + time_ab_float128_z_flag( float128_eq_signaling ); + break; + case FLOAT128_LE_QUIET: + time_ab_float128_z_flag( float128_le_quiet ); + break; + case FLOAT128_LT_QUIET: + time_ab_float128_z_flag( float128_lt_quiet ); + break; +#endif + } + +} + +static void + timeFunction( + uint8 functionCode, + int8 roundingPrecisionIn, + int8 roundingModeIn, + int8 tininessModeIn + ) +{ + int8 roundingPrecision, roundingMode, tininessMode; + + roundingPrecision = 32; + for (;;) { + if ( ! functions[ functionCode ].roundingPrecision ) { + roundingPrecision = 0; + } + else if ( roundingPrecisionIn ) { + roundingPrecision = roundingPrecisionIn; + } + for ( roundingMode = 1; + roundingMode < NUM_ROUNDINGMODES; + ++roundingMode + ) { + if ( ! functions[ functionCode ].roundingMode ) { + roundingMode = 0; + } + else if ( roundingModeIn ) { + roundingMode = roundingModeIn; + } + for ( tininessMode = 1; + tininessMode < NUM_TININESSMODES; + ++tininessMode + ) { + if ( ( roundingPrecision == 32 ) + || ( roundingPrecision == 64 ) ) { + if ( ! functions[ functionCode ] + .tininessModeAtReducedPrecision + ) { + tininessMode = 0; + } + else if ( tininessModeIn ) { + tininessMode = tininessModeIn; + } + } + else { + if ( ! functions[ functionCode ].tininessMode ) { + tininessMode = 0; + } + else if ( tininessModeIn ) { + tininessMode = tininessModeIn; + } + } + timeFunctionVariety( + functionCode, roundingPrecision, roundingMode, tininessMode + ); + if ( tininessModeIn || ! tininessMode ) break; + } + if ( roundingModeIn || ! roundingMode ) break; + } + if ( roundingPrecisionIn || ! roundingPrecision ) break; + if ( roundingPrecision == 80 ) { + break; + } + else if ( roundingPrecision == 64 ) { + roundingPrecision = 80; + } + else if ( roundingPrecision == 32 ) { + roundingPrecision = 64; + } + } + +} + +main( int argc, char **argv ) +{ + char *argPtr; + flag functionArgument; + uint8 functionCode; + int8 operands, roundingPrecision, roundingMode, tininessMode; + + if ( argc <= 1 ) goto writeHelpMessage; + functionArgument = FALSE; + functionCode = 0; + operands = 0; + roundingPrecision = 0; + roundingMode = 0; + tininessMode = 0; + --argc; + ++argv; + while ( argc && ( argPtr = argv[ 0 ] ) ) { + if ( argPtr[ 0 ] == '-' ) ++argPtr; + if ( strcmp( argPtr, "help" ) == 0 ) { + writeHelpMessage: + fputs( +"timesoftfloat [