diff --git a/ArmPkg/Library/ArmSoftFloatLib/Arm/__aeabi_dcmpeq.c b/ArmPkg/Library/ArmSoftFloatLib/Arm/__aeabi_dcmpeq.c new file mode 100644 index 0000000000..8bde7a5489 --- /dev/null +++ b/ArmPkg/Library/ArmSoftFloatLib/Arm/__aeabi_dcmpeq.c @@ -0,0 +1,37 @@ +/* $NetBSD: __aeabi_dcmpeq.c,v 1.1 2013/04/16 10:37:39 matt Exp $ */ + +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ + +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: __aeabi_dcmpeq.c,v 1.1 2013/04/16 10:37:39 matt Exp $"); +#endif /* LIBC_SCCS and not lint */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +int __aeabi_dcmpeq(float64, float64); + +int +__aeabi_dcmpeq(float64 a, float64 b) +{ + + return float64_eq(a, b); +} diff --git a/ArmPkg/Library/ArmSoftFloatLib/Arm/__aeabi_dcmpge.c b/ArmPkg/Library/ArmSoftFloatLib/Arm/__aeabi_dcmpge.c new file mode 100644 index 0000000000..c153feb6a8 --- /dev/null +++ b/ArmPkg/Library/ArmSoftFloatLib/Arm/__aeabi_dcmpge.c @@ -0,0 +1,35 @@ +/* $NetBSD: __aeabi_dcmpge.c,v 1.2 2013/04/16 13:38:34 matt Exp $ */ +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: __aeabi_dcmpge.c,v 1.2 2013/04/16 13:38:34 matt Exp $"); +#endif /* LIBC_SCCS and not lint */ + +int __aeabi_dcmpge(float64, float64); + +int +__aeabi_dcmpge(float64 a, float64 b) +{ + + return !float64_lt(a, b) && float64_eq(a, a) && float64_eq(b, b); +} diff --git a/ArmPkg/Library/ArmSoftFloatLib/Arm/__aeabi_dcmpgt.c b/ArmPkg/Library/ArmSoftFloatLib/Arm/__aeabi_dcmpgt.c new file mode 100644 index 0000000000..5fb1606697 --- /dev/null +++ b/ArmPkg/Library/ArmSoftFloatLib/Arm/__aeabi_dcmpgt.c @@ -0,0 +1,37 @@ +/* $NetBSD: __aeabi_dcmpgt.c,v 1.2 2013/04/16 13:38:34 matt Exp $ */ + +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ + +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: __aeabi_dcmpgt.c,v 1.2 2013/04/16 13:38:34 matt Exp $"); +#endif /* LIBC_SCCS and not lint */ + +int __aeabi_dcmpgt(float64, float64); + +int +__aeabi_dcmpgt(float64 a, float64 b) +{ + + return !float64_le(a, b) && float64_eq(a, a) && float64_eq(b, b); +} diff --git a/ArmPkg/Library/ArmSoftFloatLib/Arm/__aeabi_dcmple.c b/ArmPkg/Library/ArmSoftFloatLib/Arm/__aeabi_dcmple.c new file mode 100644 index 0000000000..a8327c5e5b --- /dev/null +++ b/ArmPkg/Library/ArmSoftFloatLib/Arm/__aeabi_dcmple.c @@ -0,0 +1,37 @@ +/* $NetBSD: __aeabi_dcmple.c,v 1.1 2013/04/16 10:37:39 matt Exp $ */ + +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ + +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: __aeabi_dcmple.c,v 1.1 2013/04/16 10:37:39 matt Exp $"); +#endif /* LIBC_SCCS and not lint */ + +int __aeabi_dcmple(float64, float64); + +int +__aeabi_dcmple(float64 a, float64 b) +{ + + return float64_le(a, b); +} diff --git a/ArmPkg/Library/ArmSoftFloatLib/Arm/__aeabi_dcmplt.c b/ArmPkg/Library/ArmSoftFloatLib/Arm/__aeabi_dcmplt.c new file mode 100644 index 0000000000..8d0e143cb4 --- /dev/null +++ b/ArmPkg/Library/ArmSoftFloatLib/Arm/__aeabi_dcmplt.c @@ -0,0 +1,37 @@ +/* $NetBSD: __aeabi_dcmplt.c,v 1.1 2013/04/16 10:37:39 matt Exp $ */ + +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ + +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: __aeabi_dcmplt.c,v 1.1 2013/04/16 10:37:39 matt Exp $"); +#endif /* LIBC_SCCS and not lint */ + +int __aeabi_dcmplt(float64, float64); + +int +__aeabi_dcmplt(float64 a, float64 b) +{ + + return float64_lt(a, b); +} diff --git a/ArmPkg/Library/ArmSoftFloatLib/Arm/__aeabi_dcmpun.c b/ArmPkg/Library/ArmSoftFloatLib/Arm/__aeabi_dcmpun.c new file mode 100644 index 0000000000..fa91120a6c --- /dev/null +++ b/ArmPkg/Library/ArmSoftFloatLib/Arm/__aeabi_dcmpun.c @@ -0,0 +1,42 @@ +/* $NetBSD: __aeabi_dcmpun.c,v 1.1 2013/04/16 10:37:39 matt Exp $ */ + +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ + +/* + * Written by Richard Earnshaw, 2003. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: __aeabi_dcmpun.c,v 1.1 2013/04/16 10:37:39 matt Exp $"); +#endif /* LIBC_SCCS and not lint */ + +int __aeabi_dcmpun(float64, float64); + +int +__aeabi_dcmpun(float64 a, float64 b) +{ + /* + * The comparison is unordered if either input is a NaN. + * Test for this by comparing each operand with itself. + * We must perform both comparisons to correctly check for + * signalling NaNs. + */ + return !float64_eq(a, a) || !float64_eq(b, b); +} diff --git a/ArmPkg/Library/ArmSoftFloatLib/Arm/__aeabi_fcmpeq.c b/ArmPkg/Library/ArmSoftFloatLib/Arm/__aeabi_fcmpeq.c new file mode 100644 index 0000000000..83db09e6a4 --- /dev/null +++ b/ArmPkg/Library/ArmSoftFloatLib/Arm/__aeabi_fcmpeq.c @@ -0,0 +1,37 @@ +/* $NetBSD: __aeabi_fcmpeq.c,v 1.1 2013/04/16 10:37:39 matt Exp $ */ + +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ + +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: __aeabi_fcmpeq.c,v 1.1 2013/04/16 10:37:39 matt Exp $"); +#endif /* LIBC_SCCS and not lint */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +int __aeabi_fcmpeq(float32, float32); + +int +__aeabi_fcmpeq(float32 a, float32 b) +{ + + return float32_eq(a, b); +} diff --git a/ArmPkg/Library/ArmSoftFloatLib/Arm/__aeabi_fcmpge.c b/ArmPkg/Library/ArmSoftFloatLib/Arm/__aeabi_fcmpge.c new file mode 100644 index 0000000000..db59a98822 --- /dev/null +++ b/ArmPkg/Library/ArmSoftFloatLib/Arm/__aeabi_fcmpge.c @@ -0,0 +1,37 @@ +/* $NetBSD: __aeabi_fcmpge.c,v 1.2 2013/04/16 13:38:34 matt Exp $ */ + +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ + +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: __aeabi_fcmpge.c,v 1.2 2013/04/16 13:38:34 matt Exp $"); +#endif /* LIBC_SCCS and not lint */ + +int __aeabi_fcmpge(float32, float32); + +int +__aeabi_fcmpge(float32 a, float32 b) +{ + + return !float32_lt(a, b) && float32_eq(a, a) && float32_eq(b, b); +} diff --git a/ArmPkg/Library/ArmSoftFloatLib/Arm/__aeabi_fcmpgt.c b/ArmPkg/Library/ArmSoftFloatLib/Arm/__aeabi_fcmpgt.c new file mode 100644 index 0000000000..6d6dea6088 --- /dev/null +++ b/ArmPkg/Library/ArmSoftFloatLib/Arm/__aeabi_fcmpgt.c @@ -0,0 +1,37 @@ +/* $NetBSD: __aeabi_fcmpgt.c,v 1.2 2013/04/16 13:38:34 matt Exp $ */ + +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ + +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: __aeabi_fcmpgt.c,v 1.2 2013/04/16 13:38:34 matt Exp $"); +#endif /* LIBC_SCCS and not lint */ + +int __aeabi_fcmpgt(float32, float32); + +int +__aeabi_fcmpgt(float32 a, float32 b) +{ + + return !float32_le(a, b) && float32_eq(a, a) && float32_eq(b, b); +} diff --git a/ArmPkg/Library/ArmSoftFloatLib/Arm/__aeabi_fcmple.c b/ArmPkg/Library/ArmSoftFloatLib/Arm/__aeabi_fcmple.c new file mode 100644 index 0000000000..84c0355e2e --- /dev/null +++ b/ArmPkg/Library/ArmSoftFloatLib/Arm/__aeabi_fcmple.c @@ -0,0 +1,37 @@ +/* $NetBSD: __aeabi_fcmple.c,v 1.1 2013/04/16 10:37:39 matt Exp $ */ + +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ + +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: __aeabi_fcmple.c,v 1.1 2013/04/16 10:37:39 matt Exp $"); +#endif /* LIBC_SCCS and not lint */ + +int __aeabi_fcmple(float32, float32); + +int +__aeabi_fcmple(float32 a, float32 b) +{ + + return float32_le(a, b); +} diff --git a/ArmPkg/Library/ArmSoftFloatLib/Arm/__aeabi_fcmplt.c b/ArmPkg/Library/ArmSoftFloatLib/Arm/__aeabi_fcmplt.c new file mode 100644 index 0000000000..a421e8ce21 --- /dev/null +++ b/ArmPkg/Library/ArmSoftFloatLib/Arm/__aeabi_fcmplt.c @@ -0,0 +1,37 @@ +/* $NetBSD: __aeabi_fcmplt.c,v 1.1 2013/04/16 10:37:39 matt Exp $ */ + +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ + +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: __aeabi_fcmplt.c,v 1.1 2013/04/16 10:37:39 matt Exp $"); +#endif /* LIBC_SCCS and not lint */ + +int __aeabi_fcmplt(float32, float32); + +int +__aeabi_fcmplt(float32 a, float32 b) +{ + + return float32_lt(a, b); +} diff --git a/ArmPkg/Library/ArmSoftFloatLib/Arm/__aeabi_fcmpun.c b/ArmPkg/Library/ArmSoftFloatLib/Arm/__aeabi_fcmpun.c new file mode 100644 index 0000000000..403afba17e --- /dev/null +++ b/ArmPkg/Library/ArmSoftFloatLib/Arm/__aeabi_fcmpun.c @@ -0,0 +1,42 @@ +/* $NetBSD: __aeabi_fcmpun.c,v 1.1 2013/04/16 10:37:39 matt Exp $ */ + +/** @file +* +* Copyright (c) 2013 - 2014, ARM Limited. All rights reserved. +* +* This program and the accompanying materials +* are licensed and made available under the terms and conditions of the BSD License +* which accompanies this distribution. The full text of the license may be found at +* http://opensource.org/licenses/bsd-license.php +* +* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +* +**/ + +/* + * Written by Richard Earnshaw, 2003. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: __aeabi_fcmpun.c,v 1.1 2013/04/16 10:37:39 matt Exp $"); +#endif /* LIBC_SCCS and not lint */ + +int __aeabi_fcmpun(float32, float32); + +int +__aeabi_fcmpun(float32 a, float32 b) +{ + /* + * The comparison is unordered if either input is a NaN. + * Test for this by comparing each operand with itself. + * We must perform both comparisons to correctly check for + * signalling NaNs. + */ + return !float32_eq(a, a) || !float32_eq(b, b); +} diff --git a/ArmPkg/Library/ArmSoftFloatLib/Arm/softfloat.h b/ArmPkg/Library/ArmSoftFloatLib/Arm/softfloat.h new file mode 100644 index 0000000000..a9004f6723 --- /dev/null +++ b/ArmPkg/Library/ArmSoftFloatLib/Arm/softfloat.h @@ -0,0 +1,345 @@ +/* $NetBSD: softfloat.h,v 1.10 2013/04/24 18:04:46 matt Exp $ */ + +/* This is a derivative work. */ + +/* +=============================================================================== + +This C header file is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2a. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these four paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +/* +------------------------------------------------------------------------------- +The macro `FLOATX80' must be defined to enable the extended double-precision +floating-point format `floatx80'. If this macro is not defined, the +`floatx80' type will not be defined, and none of the functions that either +input or output the `floatx80' type will be defined. The same applies to +the `FLOAT128' macro and the quadruple-precision format `float128'. +------------------------------------------------------------------------------- +*/ +/* #define FLOATX80 */ +/* #define FLOAT128 */ + +#define FE_INVALID 0x01 /* invalid operation exception */ +#define FE_DIVBYZERO 0x02 /* divide-by-zero exception */ +#define FE_OVERFLOW 0x04 /* overflow exception */ +#define FE_UNDERFLOW 0x08 /* underflow exception */ +#define FE_INEXACT 0x10 /* imprecise (loss of precision; "inexact") */ + +#define FE_ALL_EXCEPT 0x1f + +#define FE_TONEAREST 0 /* round to nearest representable number */ +#define FE_UPWARD 1 /* round toward positive infinity */ +#define FE_DOWNWARD 2 /* round toward negative infinity */ +#define FE_TOWARDZERO 3 /* round to zero (truncate) */ + +typedef int fp_except; + +/* Bit defines for fp_except */ + +#define FP_X_INV FE_INVALID /* invalid operation exception */ +#define FP_X_DZ FE_DIVBYZERO /* divide-by-zero exception */ +#define FP_X_OFL FE_OVERFLOW /* overflow exception */ +#define FP_X_UFL FE_UNDERFLOW /* underflow exception */ +#define FP_X_IMP FE_INEXACT /* imprecise (prec. loss; "inexact") */ + +/* Rounding modes */ + +typedef enum { + FP_RN=FE_TONEAREST, /* round to nearest representable number */ + FP_RP=FE_UPWARD, /* round toward positive infinity */ + FP_RM=FE_DOWNWARD, /* round toward negative infinity */ + FP_RZ=FE_TOWARDZERO /* round to zero (truncate) */ +} fp_rnd; + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE floating-point types. +------------------------------------------------------------------------------- +*/ +typedef unsigned int float32; +typedef unsigned long long float64; +#ifdef FLOATX80 +typedef struct { + unsigned short high; + unsigned long long low; +} floatx80; +#endif +#ifdef FLOAT128 +typedef struct { + unsigned long long high, low; +} float128; +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE floating-point underflow tininess-detection mode. +------------------------------------------------------------------------------- +*/ +#ifndef SOFTFLOAT_FOR_GCC +extern int float_detect_tininess; +#endif +enum { + float_tininess_after_rounding = 0, + float_tininess_before_rounding = 1 +}; + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE floating-point rounding mode. +------------------------------------------------------------------------------- +*/ +extern fp_rnd float_rounding_mode; +#define float_round_nearest_even FP_RN +#define float_round_to_zero FP_RZ +#define float_round_down FP_RM +#define float_round_up FP_RP + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE floating-point exception flags. +------------------------------------------------------------------------------- +*/ +extern fp_except float_exception_flags; +extern fp_except float_exception_mask; +enum { + float_flag_inexact = FP_X_IMP, + float_flag_underflow = FP_X_UFL, + float_flag_overflow = FP_X_OFL, + float_flag_divbyzero = FP_X_DZ, + float_flag_invalid = FP_X_INV +}; + +/* +------------------------------------------------------------------------------- +Routine to raise any or all of the software IEC/IEEE floating-point +exception flags. +------------------------------------------------------------------------------- +*/ +void float_raise( fp_except ); + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE integer-to-floating-point conversion routines. +------------------------------------------------------------------------------- +*/ +float32 int32_to_float32( int32 ); +float32 uint32_to_float32( uint32 ); +float64 int32_to_float64( int32 ); +float64 uint32_to_float64( uint32 ); +#ifdef FLOATX80 +floatx80 int32_to_floatx80( int32 ); +floatx80 uint32_to_floatx80( uint32 ); +#endif +#ifdef FLOAT128 +float128 int32_to_float128( int32 ); +float128 uint32_to_float128( uint32 ); +#endif +#ifndef SOFTFLOAT_FOR_GCC /* __floatdi?f is in libgcc2.c */ +float32 int64_to_float32( long long ); +float64 int64_to_float64( long long ); +#ifdef FLOATX80 +floatx80 int64_to_floatx80( long long ); +#endif +#ifdef FLOAT128 +float128 int64_to_float128( long long ); +#endif +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE single-precision conversion routines. +------------------------------------------------------------------------------- +*/ +int float32_to_int32( float32 ); +int float32_to_int32_round_to_zero( float32 ); +#if defined(SOFTFLOAT_FOR_GCC) && defined(SOFTFLOAT_NEED_FIXUNS) +unsigned int float32_to_uint32_round_to_zero( float32 ); +#endif +#ifndef SOFTFLOAT_FOR_GCC /* __fix?fdi provided by libgcc2.c */ +long long float32_to_int64( float32 ); +long long float32_to_int64_round_to_zero( float32 ); +#endif +float64 float32_to_float64( float32 ); +#ifdef FLOATX80 +floatx80 float32_to_floatx80( float32 ); +#endif +#ifdef FLOAT128 +float128 float32_to_float128( float32 ); +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE single-precision operations. +------------------------------------------------------------------------------- +*/ +float32 float32_round_to_int( float32 ); +float32 float32_add( float32, float32 ); +float32 float32_sub( float32, float32 ); +float32 float32_mul( float32, float32 ); +float32 float32_div( float32, float32 ); +float32 float32_rem( float32, float32 ); +float32 float32_sqrt( float32 ); +int float32_eq( float32, float32 ); +int float32_le( float32, float32 ); +int float32_lt( float32, float32 ); +int float32_eq_signaling( float32, float32 ); +int float32_le_quiet( float32, float32 ); +int float32_lt_quiet( float32, float32 ); +#ifndef SOFTFLOAT_FOR_GCC +int float32_is_signaling_nan( float32 ); +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE double-precision conversion routines. +------------------------------------------------------------------------------- +*/ +int float64_to_int32( float64 ); +int float64_to_int32_round_to_zero( float64 ); +#if defined(SOFTFLOAT_FOR_GCC) && defined(SOFTFLOAT_NEED_FIXUNS) +unsigned int float64_to_uint32_round_to_zero( float64 ); +#endif +#ifndef SOFTFLOAT_FOR_GCC /* __fix?fdi provided by libgcc2.c */ +long long float64_to_int64( float64 ); +long long float64_to_int64_round_to_zero( float64 ); +#endif +float32 float64_to_float32( float64 ); +#ifdef FLOATX80 +floatx80 float64_to_floatx80( float64 ); +#endif +#ifdef FLOAT128 +float128 float64_to_float128( float64 ); +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE double-precision operations. +------------------------------------------------------------------------------- +*/ +float64 float64_round_to_int( float64 ); +float64 float64_add( float64, float64 ); +float64 float64_sub( float64, float64 ); +float64 float64_mul( float64, float64 ); +float64 float64_div( float64, float64 ); +float64 float64_rem( float64, float64 ); +float64 float64_sqrt( float64 ); +int float64_eq( float64, float64 ); +int float64_le( float64, float64 ); +int float64_lt( float64, float64 ); +int float64_eq_signaling( float64, float64 ); +int float64_le_quiet( float64, float64 ); +int float64_lt_quiet( float64, float64 ); +#ifndef SOFTFLOAT_FOR_GCC +int float64_is_signaling_nan( float64 ); +#endif + +#ifdef FLOATX80 + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE extended double-precision conversion routines. +------------------------------------------------------------------------------- +*/ +int floatx80_to_int32( floatx80 ); +int floatx80_to_int32_round_to_zero( floatx80 ); +long long floatx80_to_int64( floatx80 ); +long long floatx80_to_int64_round_to_zero( floatx80 ); +float32 floatx80_to_float32( floatx80 ); +float64 floatx80_to_float64( floatx80 ); +#ifdef FLOAT128 +float128 floatx80_to_float128( floatx80 ); +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE extended double-precision rounding precision. Valid +values are 32, 64, and 80. +------------------------------------------------------------------------------- +*/ +extern int floatx80_rounding_precision; + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE extended double-precision operations. +------------------------------------------------------------------------------- +*/ +floatx80 floatx80_round_to_int( floatx80 ); +floatx80 floatx80_add( floatx80, floatx80 ); +floatx80 floatx80_sub( floatx80, floatx80 ); +floatx80 floatx80_mul( floatx80, floatx80 ); +floatx80 floatx80_div( floatx80, floatx80 ); +floatx80 floatx80_rem( floatx80, floatx80 ); +floatx80 floatx80_sqrt( floatx80 ); +int floatx80_eq( floatx80, floatx80 ); +int floatx80_le( floatx80, floatx80 ); +int floatx80_lt( floatx80, floatx80 ); +int floatx80_eq_signaling( floatx80, floatx80 ); +int floatx80_le_quiet( floatx80, floatx80 ); +int floatx80_lt_quiet( floatx80, floatx80 ); +int floatx80_is_signaling_nan( floatx80 ); + +#endif + +#ifdef FLOAT128 + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE quadruple-precision conversion routines. +------------------------------------------------------------------------------- +*/ +int float128_to_int32( float128 ); +int float128_to_int32_round_to_zero( float128 ); +long long float128_to_int64( float128 ); +long long float128_to_int64_round_to_zero( float128 ); +float32 float128_to_float32( float128 ); +float64 float128_to_float64( float128 ); +#ifdef FLOATX80 +floatx80 float128_to_floatx80( float128 ); +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE quadruple-precision operations. +------------------------------------------------------------------------------- +*/ +float128 float128_round_to_int( float128 ); +float128 float128_add( float128, float128 ); +float128 float128_sub( float128, float128 ); +float128 float128_mul( float128, float128 ); +float128 float128_div( float128, float128 ); +float128 float128_rem( float128, float128 ); +float128 float128_sqrt( float128 ); +int float128_eq( float128, float128 ); +int float128_le( float128, float128 ); +int float128_lt( float128, float128 ); +int float128_eq_signaling( float128, float128 ); +int float128_le_quiet( float128, float128 ); +int float128_lt_quiet( float128, float128 ); +int float128_is_signaling_nan( float128 ); + +#endif diff --git a/ArmPkg/Library/ArmSoftFloatLib/ArmSoftFloatLib.inf b/ArmPkg/Library/ArmSoftFloatLib/ArmSoftFloatLib.inf new file mode 100644 index 0000000000..7fefb107a8 --- /dev/null +++ b/ArmPkg/Library/ArmSoftFloatLib/ArmSoftFloatLib.inf @@ -0,0 +1,48 @@ +## @file +# ARM Software floating point Library. +# +# Copyright (c) 2014, ARM Ltd. All rights reserved. +# Copyright (c) 2015, Linaro Ltd. All rights reserved. +# +# This program and the accompanying materials +# are licensed and made available under the terms and conditions of the BSD License +# which accompanies this distribution. The full text of the license may be found at +# http://opensource.org/licenses/bsd-license.php. +# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +# +# +## + +[Defines] + INF_VERSION = 0x00010005 + BASE_NAME = ArmSoftFloatLib + FILE_GUID = a485f921-749e-41a0-9f91-62f09a38721c + MODULE_TYPE = BASE + VERSION_STRING = 1.0 + LIBRARY_CLASS = ArmSoftFloatLib + +# +# VALID_ARCHITECTURES = ARM +# + +[Sources] + bits32/softfloat.c + Arm/__aeabi_dcmpeq.c + Arm/__aeabi_fcmpeq.c + Arm/__aeabi_dcmpge.c + Arm/__aeabi_fcmpge.c + Arm/__aeabi_dcmpgt.c + Arm/__aeabi_fcmpgt.c + Arm/__aeabi_dcmple.c + Arm/__aeabi_fcmple.c + Arm/__aeabi_dcmplt.c + Arm/__aeabi_fcmplt.c + Arm/__aeabi_dcmpun.c + Arm/__aeabi_fcmpun.c + +[Packages] + MdePkg/MdePkg.dec + +[BuildOptions] + GCC:*_*_*_CC_FLAGS = -DSOFTFLOAT_FOR_GCC -Wno-enum-compare -fno-tree-vrp diff --git a/ArmPkg/Library/ArmSoftFloatLib/arm-gcc.h b/ArmPkg/Library/ArmSoftFloatLib/arm-gcc.h new file mode 100644 index 0000000000..8cd4989998 --- /dev/null +++ b/ArmPkg/Library/ArmSoftFloatLib/arm-gcc.h @@ -0,0 +1,114 @@ +/** @file + + Copyright (c) 2014, ARM Limited. All rights reserved. + + This program and the accompanying materials + are licensed and made available under the terms and conditions of the BSD License + which accompanies this distribution. The full text of the license may be found at + http://opensource.org/licenses/bsd-license.php + + THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, + WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. + +**/ + +/* $NetBSD: arm-gcc.h,v 1.4 2013/01/26 07:08:14 matt Exp $ */ + +/* +------------------------------------------------------------------------------- +One of the macros `BIGENDIAN' or `LITTLEENDIAN' must be defined. +------------------------------------------------------------------------------- +*/ +#ifdef __ARMEB__ +#define BIGENDIAN +#else +#define LITTLEENDIAN +#endif + +/* +------------------------------------------------------------------------------- +The macro `BITS64' can be defined to indicate that 64-bit integer types are +supported by the compiler. +------------------------------------------------------------------------------- +*/ +#define BITS64 + +/* +------------------------------------------------------------------------------- +Each of the following `typedef's defines the most convenient type that holds +integers of at least as many bits as specified. For example, `uint8' should +be the most convenient type that can hold unsigned integers of as many as +8 bits. The `flag' type must be able to hold either a 0 or 1. For most +implementations of C, `flag', `uint8', and `int8' should all be `typedef'ed +to the same as `int'. +------------------------------------------------------------------------------- +*/ +typedef int flag; +typedef int uint8; +typedef int int8; +typedef int uint16; +typedef int int16; +typedef unsigned int uint32; +typedef signed int int32; +#ifdef BITS64 +typedef unsigned long long int uint64; +typedef signed long long int int64; +#endif + +/* +------------------------------------------------------------------------------- +Each of the following `typedef's defines a type that holds integers +of _exactly_ the number of bits specified. For instance, for most +implementation of C, `bits16' and `sbits16' should be `typedef'ed to +`unsigned short int' and `signed short int' (or `short int'), respectively. +------------------------------------------------------------------------------- +*/ +typedef unsigned char bits8; +typedef signed char sbits8; +typedef unsigned short int bits16; +typedef signed short int sbits16; +typedef unsigned int bits32; +typedef signed int sbits32; +#ifdef BITS64 +typedef unsigned long long int bits64; +typedef signed long long int sbits64; +#endif + +#ifdef BITS64 +/* +------------------------------------------------------------------------------- +The `LIT64' macro takes as its argument a textual integer literal and +if necessary ``marks'' the literal as having a 64-bit integer type. +For example, the GNU C Compiler (`gcc') requires that 64-bit literals be +appended with the letters `LL' standing for `long long', which is `gcc's +name for the 64-bit integer type. Some compilers may allow `LIT64' to be +defined as the identity macro: `#define LIT64( a ) a'. +------------------------------------------------------------------------------- +*/ +#define LIT64( a ) a##ULL +#endif + +/* +------------------------------------------------------------------------------- +The macro `INLINE' can be used before functions that should be inlined. If +a compiler does not support explicit inlining, this macro should be defined +to be `static'. +------------------------------------------------------------------------------- +*/ +#define INLINE static inline + +/* +------------------------------------------------------------------------------- +The ARM FPA is odd in that it stores doubles high-order word first, no matter +what the endianness of the CPU. VFP is sane. +------------------------------------------------------------------------------- +*/ +#if defined(SOFTFLOAT_FOR_GCC) +#if defined(__VFP_FP__) || defined(__ARMEB__) +#define FLOAT64_DEMANGLE(a) (a) +#define FLOAT64_MANGLE(a) (a) +#else +#define FLOAT64_DEMANGLE(a) (((a) << 32) | ((a) >> 32)) +#define FLOAT64_MANGLE(a) FLOAT64_DEMANGLE(a) +#endif +#endif diff --git a/ArmPkg/Library/ArmSoftFloatLib/bits32/softfloat-macros b/ArmPkg/Library/ArmSoftFloatLib/bits32/softfloat-macros new file mode 100644 index 0000000000..8e1f2d8b9a --- /dev/null +++ b/ArmPkg/Library/ArmSoftFloatLib/bits32/softfloat-macros @@ -0,0 +1,648 @@ + +/* +=============================================================================== + +This C source fragment is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2a. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these four paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +/* +------------------------------------------------------------------------------- +Shifts `a' right by the number of bits given in `count'. If any nonzero +bits are shifted off, they are ``jammed'' into the least significant bit of +the result by setting the least significant bit to 1. The value of `count' +can be arbitrarily large; in particular, if `count' is greater than 32, the +result will be either 0 or 1, depending on whether `a' is zero or nonzero. +The result is stored in the location pointed to by `zPtr'. +------------------------------------------------------------------------------- +*/ +INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr ) +{ + bits32 z; + + if ( count == 0 ) { + z = a; + } + else if ( count < 32 ) { + z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 ); + } + else { + z = ( a != 0 ); + } + *zPtr = z; + +} + +/* +------------------------------------------------------------------------------- +Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the +number of bits given in `count'. Any bits shifted off are lost. The value +of `count' can be arbitrarily large; in particular, if `count' is greater +than 64, the result will be 0. The result is broken into two 32-bit pieces +which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + shift64Right( + bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr ) +{ + bits32 z0, z1; + int8 negCount = ( - count ) & 31; + + if ( count == 0 ) { + z1 = a1; + z0 = a0; + } + else if ( count < 32 ) { + z1 = ( a0<>count ); + z0 = a0>>count; + } + else { + z1 = ( count < 64 ) ? ( a0>>( count & 31 ) ) : 0; + z0 = 0; + } + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the +number of bits given in `count'. If any nonzero bits are shifted off, they +are ``jammed'' into the least significant bit of the result by setting the +least significant bit to 1. The value of `count' can be arbitrarily large; +in particular, if `count' is greater than 64, the result will be either 0 +or 1, depending on whether the concatenation of `a0' and `a1' is zero or +nonzero. The result is broken into two 32-bit pieces which are stored at +the locations pointed to by `z0Ptr' and `z1Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + shift64RightJamming( + bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr ) +{ + bits32 z0, z1; + int8 negCount = ( - count ) & 31; + + if ( count == 0 ) { + z1 = a1; + z0 = a0; + } + else if ( count < 32 ) { + z1 = ( a0<>count ) | ( ( a1<>count; + } + else { + if ( count == 32 ) { + z1 = a0 | ( a1 != 0 ); + } + else if ( count < 64 ) { + z1 = ( a0>>( count & 31 ) ) | ( ( ( a0<>count ); + z0 = a0>>count; + } + else { + if ( count == 32 ) { + z2 = a1; + z1 = a0; + } + else { + a2 |= a1; + if ( count < 64 ) { + z2 = a0<>( count & 31 ); + } + else { + z2 = ( count == 64 ) ? a0 : ( a0 != 0 ); + z1 = 0; + } + } + z0 = 0; + } + z2 |= ( a2 != 0 ); + } + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Shifts the 64-bit value formed by concatenating `a0' and `a1' left by the +number of bits given in `count'. Any bits shifted off are lost. The value +of `count' must be less than 32. The result is broken into two 32-bit +pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + shortShift64Left( + bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr ) +{ + + *z1Ptr = a1<>( ( - count ) & 31 ) ); + +} + +/* +------------------------------------------------------------------------------- +Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' left +by the number of bits given in `count'. Any bits shifted off are lost. +The value of `count' must be less than 32. The result is broken into three +32-bit pieces which are stored at the locations pointed to by `z0Ptr', +`z1Ptr', and `z2Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + shortShift96Left( + bits32 a0, + bits32 a1, + bits32 a2, + int16 count, + bits32 *z0Ptr, + bits32 *z1Ptr, + bits32 *z2Ptr + ) +{ + bits32 z0, z1, z2; + int8 negCount; + + z2 = a2<>negCount; + z0 |= a1>>negCount; + } + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Adds the 64-bit value formed by concatenating `a0' and `a1' to the 64-bit +value formed by concatenating `b0' and `b1'. Addition is modulo 2^64, so +any carry out is lost. The result is broken into two 32-bit pieces which +are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + add64( + bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr ) +{ + bits32 z1; + + z1 = a1 + b1; + *z1Ptr = z1; + *z0Ptr = a0 + b0 + ( z1 < a1 ); + +} + +/* +------------------------------------------------------------------------------- +Adds the 96-bit value formed by concatenating `a0', `a1', and `a2' to the +96-bit value formed by concatenating `b0', `b1', and `b2'. Addition is +modulo 2^96, so any carry out is lost. The result is broken into three +32-bit pieces which are stored at the locations pointed to by `z0Ptr', +`z1Ptr', and `z2Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + add96( + bits32 a0, + bits32 a1, + bits32 a2, + bits32 b0, + bits32 b1, + bits32 b2, + bits32 *z0Ptr, + bits32 *z1Ptr, + bits32 *z2Ptr + ) +{ + bits32 z0, z1, z2; + int8 carry0, carry1; + + z2 = a2 + b2; + carry1 = ( z2 < a2 ); + z1 = a1 + b1; + carry0 = ( z1 < a1 ); + z0 = a0 + b0; + z1 += carry1; + z0 += ( z1 < (bits32)carry1 ); + z0 += carry0; + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Subtracts the 64-bit value formed by concatenating `b0' and `b1' from the +64-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo +2^64, so any borrow out (carry out) is lost. The result is broken into two +32-bit pieces which are stored at the locations pointed to by `z0Ptr' and +`z1Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + sub64( + bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr ) +{ + + *z1Ptr = a1 - b1; + *z0Ptr = a0 - b0 - ( a1 < b1 ); + +} + +/* +------------------------------------------------------------------------------- +Subtracts the 96-bit value formed by concatenating `b0', `b1', and `b2' from +the 96-bit value formed by concatenating `a0', `a1', and `a2'. Subtraction +is modulo 2^96, so any borrow out (carry out) is lost. The result is broken +into three 32-bit pieces which are stored at the locations pointed to by +`z0Ptr', `z1Ptr', and `z2Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + sub96( + bits32 a0, + bits32 a1, + bits32 a2, + bits32 b0, + bits32 b1, + bits32 b2, + bits32 *z0Ptr, + bits32 *z1Ptr, + bits32 *z2Ptr + ) +{ + bits32 z0, z1, z2; + int8 borrow0, borrow1; + + z2 = a2 - b2; + borrow1 = ( a2 < b2 ); + z1 = a1 - b1; + borrow0 = ( a1 < b1 ); + z0 = a0 - b0; + z0 -= ( z1 < (bits32)borrow1 ); + z1 -= borrow1; + z0 -= borrow0; + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Multiplies `a' by `b' to obtain a 64-bit product. The product is broken +into two 32-bit pieces which are stored at the locations pointed to by +`z0Ptr' and `z1Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void mul32To64( bits32 a, bits32 b, bits32 *z0Ptr, bits32 *z1Ptr ) +{ + bits16 aHigh, aLow, bHigh, bLow; + bits32 z0, zMiddleA, zMiddleB, z1; + + aLow = a; + aHigh = a>>16; + bLow = b; + bHigh = b>>16; + z1 = ( (bits32) aLow ) * bLow; + zMiddleA = ( (bits32) aLow ) * bHigh; + zMiddleB = ( (bits32) aHigh ) * bLow; + z0 = ( (bits32) aHigh ) * bHigh; + zMiddleA += zMiddleB; + z0 += ( ( (bits32) ( zMiddleA < zMiddleB ) )<<16 ) + ( zMiddleA>>16 ); + zMiddleA <<= 16; + z1 += zMiddleA; + z0 += ( z1 < zMiddleA ); + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Multiplies the 64-bit value formed by concatenating `a0' and `a1' by `b' +to obtain a 96-bit product. The product is broken into three 32-bit pieces +which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and +`z2Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + mul64By32To96( + bits32 a0, + bits32 a1, + bits32 b, + bits32 *z0Ptr, + bits32 *z1Ptr, + bits32 *z2Ptr + ) +{ + bits32 z0, z1, z2, more1; + + mul32To64( a1, b, &z1, &z2 ); + mul32To64( a0, b, &z0, &more1 ); + add64( z0, more1, 0, z1, &z0, &z1 ); + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Multiplies the 64-bit value formed by concatenating `a0' and `a1' to the +64-bit value formed by concatenating `b0' and `b1' to obtain a 128-bit +product. The product is broken into four 32-bit pieces which are stored at +the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + mul64To128( + bits32 a0, + bits32 a1, + bits32 b0, + bits32 b1, + bits32 *z0Ptr, + bits32 *z1Ptr, + bits32 *z2Ptr, + bits32 *z3Ptr + ) +{ + bits32 z0, z1, z2, z3; + bits32 more1, more2; + + mul32To64( a1, b1, &z2, &z3 ); + mul32To64( a1, b0, &z1, &more2 ); + add64( z1, more2, 0, z2, &z1, &z2 ); + mul32To64( a0, b0, &z0, &more1 ); + add64( z0, more1, 0, z1, &z0, &z1 ); + mul32To64( a0, b1, &more1, &more2 ); + add64( more1, more2, 0, z2, &more1, &z2 ); + add64( z0, z1, 0, more1, &z0, &z1 ); + *z3Ptr = z3; + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Returns an approximation to the 32-bit integer quotient obtained by dividing +`b' into the 64-bit value formed by concatenating `a0' and `a1'. The +divisor `b' must be at least 2^31. If q is the exact quotient truncated +toward zero, the approximation returned lies between q and q + 2 inclusive. +If the exact quotient q is larger than 32 bits, the maximum positive 32-bit +unsigned integer is returned. +------------------------------------------------------------------------------- +*/ +static bits32 estimateDiv64To32( bits32 a0, bits32 a1, bits32 b ) +{ + bits32 b0, b1; + bits32 rem0, rem1, term0, term1; + bits32 z; + + if ( b <= a0 ) return 0xFFFFFFFF; + b0 = b>>16; + z = ( b0<<16 <= a0 ) ? 0xFFFF0000 : ( a0 / b0 )<<16; + mul32To64( b, z, &term0, &term1 ); + sub64( a0, a1, term0, term1, &rem0, &rem1 ); + while ( ( (sbits32) rem0 ) < 0 ) { + z -= 0x10000; + b1 = b<<16; + add64( rem0, rem1, b0, b1, &rem0, &rem1 ); + } + rem0 = ( rem0<<16 ) | ( rem1>>16 ); + z |= ( b0<<16 <= rem0 ) ? 0xFFFF : rem0 / b0; + return z; + +} + +#ifndef SOFTFLOAT_FOR_GCC +/* +------------------------------------------------------------------------------- +Returns an approximation to the square root of the 32-bit significand given +by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of +`aExp' (the least significant bit) is 1, the integer returned approximates +2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp' +is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either +case, the approximation returned lies strictly within +/-2 of the exact +value. +------------------------------------------------------------------------------- +*/ +static bits32 estimateSqrt32( int16 aExp, bits32 a ) +{ + static const bits16 sqrtOddAdjustments[] = { + 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0, + 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67 + }; + static const bits16 sqrtEvenAdjustments[] = { + 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E, + 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002 + }; + int8 index; + bits32 z; + + index = ( a>>27 ) & 15; + if ( aExp & 1 ) { + z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ]; + z = ( ( a / z )<<14 ) + ( z<<15 ); + a >>= 1; + } + else { + z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ]; + z = a / z + z; + z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 ); + if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 ); + } + return ( ( estimateDiv64To32( a, 0, z ) )>>1 ) + ( z>>1 ); + +} +#endif + +/* +------------------------------------------------------------------------------- +Returns the number of leading 0 bits before the most-significant 1 bit of +`a'. If `a' is zero, 32 is returned. +------------------------------------------------------------------------------- +*/ +static int8 countLeadingZeros32( bits32 a ) +{ + static const int8 countLeadingZerosHigh[] = { + 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; + int8 shiftCount; + + shiftCount = 0; + if ( a < 0x10000 ) { + shiftCount += 16; + a <<= 16; + } + if ( a < 0x1000000 ) { + shiftCount += 8; + a <<= 8; + } + shiftCount += countLeadingZerosHigh[ a>>24 ]; + return shiftCount; + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is +equal to the 64-bit value formed by concatenating `b0' and `b1'. Otherwise, +returns 0. +------------------------------------------------------------------------------- +*/ +INLINE flag eq64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 ) +{ + + return ( a0 == b0 ) && ( a1 == b1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less +than or equal to the 64-bit value formed by concatenating `b0' and `b1'. +Otherwise, returns 0. +------------------------------------------------------------------------------- +*/ +INLINE flag le64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 ) +{ + + return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less +than the 64-bit value formed by concatenating `b0' and `b1'. Otherwise, +returns 0. +------------------------------------------------------------------------------- +*/ +INLINE flag lt64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 ) +{ + + return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is not +equal to the 64-bit value formed by concatenating `b0' and `b1'. Otherwise, +returns 0. +------------------------------------------------------------------------------- +*/ +INLINE flag ne64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 ) +{ + + return ( a0 != b0 ) || ( a1 != b1 ); + +} + diff --git a/ArmPkg/Library/ArmSoftFloatLib/bits32/softfloat.c b/ArmPkg/Library/ArmSoftFloatLib/bits32/softfloat.c new file mode 100644 index 0000000000..a513bf94e1 --- /dev/null +++ b/ArmPkg/Library/ArmSoftFloatLib/bits32/softfloat.c @@ -0,0 +1,2355 @@ +/* $NetBSD: softfloat.c,v 1.3 2013/01/10 08:16:11 matt Exp $ */ + +/* + * This version hacked for use with gcc -msoft-float by bjh21. + * (Mostly a case of #ifdefing out things GCC doesn't need or provides + * itself). + */ + +/* + * Things you may want to define: + * + * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with + * -msoft-float) to work. Include "softfloat-for-gcc.h" to get them + * properly renamed. + */ + +/* + * This differs from the standard bits32/softfloat.c in that float64 + * is defined to be a 64-bit integer rather than a structure. The + * structure is float64s, with translation between the two going via + * float64u. + */ + +/* +=============================================================================== + +This C source file is part of the SoftFloat IEC/IEEE Floating-Point +Arithmetic Package, Release 2a. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these four paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: softfloat.c,v 1.3 2013/01/10 08:16:11 matt Exp $"); +#endif /* LIBC_SCCS and not lint */ + +#ifdef SOFTFLOAT_FOR_GCC +#include "softfloat-for-gcc.h" +#endif + +#include "milieu.h" +#include "softfloat.h" + +/* + * Conversions between floats as stored in memory and floats as + * SoftFloat uses them + */ +#ifndef FLOAT64_DEMANGLE +#define FLOAT64_DEMANGLE(a) (a) +#endif +#ifndef FLOAT64_MANGLE +#define FLOAT64_MANGLE(a) (a) +#endif + +/* +------------------------------------------------------------------------------- +Floating-point rounding mode and exception flags. +------------------------------------------------------------------------------- +*/ +#ifndef set_float_rounding_mode +fp_rnd float_rounding_mode = float_round_nearest_even; +fp_except float_exception_flags = 0; +#endif +#ifndef set_float_exception_inexact_flag +#define set_float_exception_inexact_flag() \ + ((void)(float_exception_flags |= float_flag_inexact)) +#endif + +/* +------------------------------------------------------------------------------- +Primitive arithmetic functions, including multi-word arithmetic, and +division and square root approximations. (Can be specialized to target if +desired.) +------------------------------------------------------------------------------- +*/ +#include "softfloat-macros" + +/* +------------------------------------------------------------------------------- +Functions and definitions to determine: (1) whether tininess for underflow +is detected before or after rounding by default, (2) what (if anything) +happens when exceptions are raised, (3) how signaling NaNs are distinguished +from quiet NaNs, (4) the default generated quiet NaNs, and (4) how NaNs +are propagated from function inputs to output. These details are target- +specific. +------------------------------------------------------------------------------- +*/ +#include "softfloat-specialize" + +/* +------------------------------------------------------------------------------- +Returns the fraction bits of the single-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE bits32 extractFloat32Frac( float32 a ) +{ + + return a & 0x007FFFFF; + +} + +/* +------------------------------------------------------------------------------- +Returns the exponent bits of the single-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE int16 extractFloat32Exp( float32 a ) +{ + + return ( a>>23 ) & 0xFF; + +} + +/* +------------------------------------------------------------------------------- +Returns the sign bit of the single-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE flag extractFloat32Sign( float32 a ) +{ + + return a>>31; + +} + +/* +------------------------------------------------------------------------------- +Normalizes the subnormal single-precision floating-point value represented +by the denormalized significand `aSig'. The normalized exponent and +significand are stored at the locations pointed to by `zExpPtr' and +`zSigPtr', respectively. +------------------------------------------------------------------------------- +*/ +static void + normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr ) +{ + int8 shiftCount; + + shiftCount = countLeadingZeros32( aSig ) - 8; + *zSigPtr = aSig<>7; + zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); + if ( zSig == 0 ) zExp = 0; + return packFloat32( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Takes an abstract floating-point value having sign `zSign', exponent `zExp', +and significand `zSig', and returns the proper single-precision floating- +point value corresponding to the abstract input. This routine is just like +`roundAndPackFloat32' except that `zSig' does not have to be normalized. +Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' +floating-point exponent. +------------------------------------------------------------------------------- +*/ +static float32 + normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) +{ + int8 shiftCount; + + shiftCount = countLeadingZeros32( zSig ) - 1; + return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<> 32) & 0x000FFFFF); + +} + +/* +------------------------------------------------------------------------------- +Returns the exponent bits of the double-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE int16 extractFloat64Exp( float64 a ) +{ + + return (int16)((FLOAT64_DEMANGLE(a) >> 52) & 0x7FF); + +} + +/* +------------------------------------------------------------------------------- +Returns the sign bit of the double-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE flag extractFloat64Sign( float64 a ) +{ + + return (flag)(FLOAT64_DEMANGLE(a) >> 63); + +} + +/* +------------------------------------------------------------------------------- +Normalizes the subnormal double-precision floating-point value represented +by the denormalized significand formed by the concatenation of `aSig0' and +`aSig1'. The normalized exponent is stored at the location pointed to by +`zExpPtr'. The most significant 21 bits of the normalized significand are +stored at the location pointed to by `zSig0Ptr', and the least significant +32 bits of the normalized significand are stored at the location pointed to +by `zSig1Ptr'. +------------------------------------------------------------------------------- +*/ +static void + normalizeFloat64Subnormal( + bits32 aSig0, + bits32 aSig1, + int16 *zExpPtr, + bits32 *zSig0Ptr, + bits32 *zSig1Ptr + ) +{ + int8 shiftCount; + + if ( aSig0 == 0 ) { + shiftCount = countLeadingZeros32( aSig1 ) - 11; + if ( shiftCount < 0 ) { + *zSig0Ptr = aSig1>>( - shiftCount ); + *zSig1Ptr = aSig1<<( shiftCount & 31 ); + } + else { + *zSig0Ptr = aSig1<>( - shiftCount ); + } + if ( aSigExtra ) set_float_exception_inexact_flag(); + roundingMode = float_rounding_mode; + if ( roundingMode == float_round_nearest_even ) { + if ( (sbits32) aSigExtra < 0 ) { + ++z; + if ( (bits32) ( aSigExtra<<1 ) == 0 ) z &= ~1; + } + if ( aSign ) z = - z; + } + else { + aSigExtra = ( aSigExtra != 0 ); + if ( aSign ) { + z += ( roundingMode == float_round_down ) & aSigExtra; + z = - z; + } + else { + z += ( roundingMode == float_round_up ) & aSigExtra; + } + } + } + return z; + +} +#endif + +/* +------------------------------------------------------------------------------- +Returns the result of converting the single-precision floating-point value +`a' to the 32-bit two's complement integer format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic, except that the conversion is always rounded toward zero. +If `a' is a NaN, the largest positive integer is returned. Otherwise, if +the conversion overflows, the largest integer with the same sign as `a' is +returned. +------------------------------------------------------------------------------- +*/ +int32 float32_to_int32_round_to_zero( float32 a ) +{ + flag aSign; + int16 aExp, shiftCount; + bits32 aSig; + int32 z; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + shiftCount = aExp - 0x9E; + if ( 0 <= shiftCount ) { + if ( a != 0xCF000000 ) { + float_raise( float_flag_invalid ); + if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF; + } + return (sbits32) 0x80000000; + } + else if ( aExp <= 0x7E ) { + if ( aExp | aSig ) set_float_exception_inexact_flag(); + return 0; + } + aSig = ( aSig | 0x00800000 )<<8; + z = aSig>>( - shiftCount ); + if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) { + set_float_exception_inexact_flag(); + } + if ( aSign ) z = - z; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the single-precision floating-point value +`a' to the double-precision floating-point format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float32_to_float64( float32 a ) +{ + flag aSign; + int16 aExp; + bits32 aSig, zSig0, zSig1; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + if ( aExp == 0xFF ) { + if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) ); + return packFloat64( aSign, 0x7FF, 0, 0 ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat64( aSign, 0, 0, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + --aExp; + } + shift64Right( aSig, 0, 3, &zSig0, &zSig1 ); + return packFloat64( aSign, aExp + 0x380, zSig0, zSig1 ); + +} + +#ifndef SOFTFLOAT_FOR_GCC +/* +------------------------------------------------------------------------------- +Rounds the single-precision floating-point value `a' to an integer, +and returns the result as a single-precision floating-point value. The +operation is performed according to the IEC/IEEE Standard for Binary +Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_round_to_int( float32 a ) +{ + flag aSign; + int16 aExp; + bits32 lastBitMask, roundBitsMask; + int8 roundingMode; + float32 z; + + aExp = extractFloat32Exp( a ); + if ( 0x96 <= aExp ) { + if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) { + return propagateFloat32NaN( a, a ); + } + return a; + } + if ( aExp <= 0x7E ) { + if ( (bits32) ( a<<1 ) == 0 ) return a; + set_float_exception_inexact_flag(); + aSign = extractFloat32Sign( a ); + switch ( float_rounding_mode ) { + case float_round_nearest_even: + if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) { + return packFloat32( aSign, 0x7F, 0 ); + } + break; + case float_round_to_zero: + break; + case float_round_down: + return aSign ? 0xBF800000 : 0; + case float_round_up: + return aSign ? 0x80000000 : 0x3F800000; + } + return packFloat32( aSign, 0, 0 ); + } + lastBitMask = 1; + lastBitMask <<= 0x96 - aExp; + roundBitsMask = lastBitMask - 1; + z = a; + roundingMode = float_rounding_mode; + if ( roundingMode == float_round_nearest_even ) { + z += lastBitMask>>1; + if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; + } + else if ( roundingMode != float_round_to_zero ) { + if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) { + z += roundBitsMask; + } + } + z &= ~ roundBitsMask; + if ( z != a ) set_float_exception_inexact_flag(); + return z; + +} +#endif + +/* +------------------------------------------------------------------------------- +Returns the result of adding the absolute values of the single-precision +floating-point values `a' and `b'. If `zSign' is 1, the sum is negated +before being returned. `zSign' is ignored if the result is a NaN. +The addition is performed according to the IEC/IEEE Standard for Binary +Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float32 addFloat32Sigs( float32 a, float32 b, flag zSign ) +{ + int16 aExp, bExp, zExp; + bits32 aSig, bSig, zSig; + int16 expDiff; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + expDiff = aExp - bExp; + aSig <<= 6; + bSig <<= 6; + if ( 0 < expDiff ) { + if ( aExp == 0xFF ) { + if ( aSig ) return propagateFloat32NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig |= 0x20000000; + } + shift32RightJamming( bSig, expDiff, &bSig ); + zExp = aExp; + } + else if ( expDiff < 0 ) { + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + return packFloat32( zSign, 0xFF, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig |= 0x20000000; + } + shift32RightJamming( aSig, - expDiff, &aSig ); + zExp = bExp; + } + else { + if ( aExp == 0xFF ) { + if ( aSig | bSig ) return propagateFloat32NaN( a, b ); + return a; + } + if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 ); + zSig = 0x40000000 + aSig + bSig; + zExp = aExp; + goto roundAndPack; + } + aSig |= 0x20000000; + zSig = ( aSig + bSig )<<1; + --zExp; + if ( (sbits32) zSig < 0 ) { + zSig = aSig + bSig; + ++zExp; + } + roundAndPack: + return roundAndPackFloat32( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the absolute values of the single- +precision floating-point values `a' and `b'. If `zSign' is 1, the +difference is negated before being returned. `zSign' is ignored if the +result is a NaN. The subtraction is performed according to the IEC/IEEE +Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float32 subFloat32Sigs( float32 a, float32 b, flag zSign ) +{ + int16 aExp, bExp, zExp; + bits32 aSig, bSig, zSig; + int16 expDiff; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + expDiff = aExp - bExp; + aSig <<= 7; + bSig <<= 7; + if ( 0 < expDiff ) goto aExpBigger; + if ( expDiff < 0 ) goto bExpBigger; + if ( aExp == 0xFF ) { + if ( aSig | bSig ) return propagateFloat32NaN( a, b ); + float_raise( float_flag_invalid ); + return float32_default_nan; + } + if ( aExp == 0 ) { + aExp = 1; + bExp = 1; + } + if ( bSig < aSig ) goto aBigger; + if ( aSig < bSig ) goto bBigger; + return packFloat32( float_rounding_mode == float_round_down, 0, 0 ); + bExpBigger: + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + return packFloat32( zSign ^ 1, 0xFF, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig |= 0x40000000; + } + shift32RightJamming( aSig, - expDiff, &aSig ); + bSig |= 0x40000000; + bBigger: + zSig = bSig - aSig; + zExp = bExp; + zSign ^= 1; + goto normalizeRoundAndPack; + aExpBigger: + if ( aExp == 0xFF ) { + if ( aSig ) return propagateFloat32NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig |= 0x40000000; + } + shift32RightJamming( bSig, expDiff, &bSig ); + aSig |= 0x40000000; + aBigger: + zSig = aSig - bSig; + zExp = aExp; + normalizeRoundAndPack: + --zExp; + return normalizeRoundAndPackFloat32( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of adding the single-precision floating-point values `a' +and `b'. The operation is performed according to the IEC/IEEE Standard for +Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_add( float32 a, float32 b ) +{ + flag aSign, bSign; + + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign == bSign ) { + return addFloat32Sigs( a, b, aSign ); + } + else { + return subFloat32Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the single-precision floating-point values +`a' and `b'. The operation is performed according to the IEC/IEEE Standard +for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_sub( float32 a, float32 b ) +{ + flag aSign, bSign; + + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign == bSign ) { + return subFloat32Sigs( a, b, aSign ); + } + else { + return addFloat32Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of multiplying the single-precision floating-point values +`a' and `b'. The operation is performed according to the IEC/IEEE Standard +for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_mul( float32 a, float32 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, zExp; + bits32 aSig, bSig, zSig0, zSig1; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + bSign = extractFloat32Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0xFF ) { + if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { + return propagateFloat32NaN( a, b ); + } + if ( ( bExp | bSig ) == 0 ) { + float_raise( float_flag_invalid ); + return float32_default_nan; + } + return packFloat32( zSign, 0xFF, 0 ); + } + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + if ( ( aExp | aSig ) == 0 ) { + float_raise( float_flag_invalid ); + return float32_default_nan; + } + return packFloat32( zSign, 0xFF, 0 ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) return packFloat32( zSign, 0, 0 ); + normalizeFloat32Subnormal( bSig, &bExp, &bSig ); + } + zExp = aExp + bExp - 0x7F; + aSig = ( aSig | 0x00800000 )<<7; + bSig = ( bSig | 0x00800000 )<<8; + mul32To64( aSig, bSig, &zSig0, &zSig1 ); + zSig0 |= ( zSig1 != 0 ); + if ( 0 <= (sbits32) ( zSig0<<1 ) ) { + zSig0 <<= 1; + --zExp; + } + return roundAndPackFloat32( zSign, zExp, zSig0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of dividing the single-precision floating-point value `a' +by the corresponding value `b'. The operation is performed according to the +IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_div( float32 a, float32 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, zExp; + bits32 aSig, bSig, zSig, rem0, rem1, term0, term1; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + bSign = extractFloat32Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0xFF ) { + if ( aSig ) return propagateFloat32NaN( a, b ); + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + float_raise( float_flag_invalid ); + return float32_default_nan; + } + return packFloat32( zSign, 0xFF, 0 ); + } + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + return packFloat32( zSign, 0, 0 ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + if ( ( aExp | aSig ) == 0 ) { + float_raise( float_flag_invalid ); + return float32_default_nan; + } + float_raise( float_flag_divbyzero ); + return packFloat32( zSign, 0xFF, 0 ); + } + normalizeFloat32Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + zExp = aExp - bExp + 0x7D; + aSig = ( aSig | 0x00800000 )<<7; + bSig = ( bSig | 0x00800000 )<<8; + if ( bSig <= ( aSig + aSig ) ) { + aSig >>= 1; + ++zExp; + } + zSig = estimateDiv64To32( aSig, 0, bSig ); + if ( ( zSig & 0x3F ) <= 2 ) { + mul32To64( bSig, zSig, &term0, &term1 ); + sub64( aSig, 0, term0, term1, &rem0, &rem1 ); + while ( (sbits32) rem0 < 0 ) { + --zSig; + add64( rem0, rem1, 0, bSig, &rem0, &rem1 ); + } + zSig |= ( rem1 != 0 ); + } + return roundAndPackFloat32( zSign, zExp, zSig ); + +} + +#ifndef SOFTFLOAT_FOR_GCC +/* +------------------------------------------------------------------------------- +Returns the remainder of the single-precision floating-point value `a' +with respect to the corresponding value `b'. The operation is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_rem( float32 a, float32 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, expDiff; + bits32 aSig, bSig, q, allZero, alternateASig; + sbits32 sigMean; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + bSign = extractFloat32Sign( b ); + if ( aExp == 0xFF ) { + if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { + return propagateFloat32NaN( a, b ); + } + float_raise( float_flag_invalid ); + return float32_default_nan; + } + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + float_raise( float_flag_invalid ); + return float32_default_nan; + } + normalizeFloat32Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return a; + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + expDiff = aExp - bExp; + aSig = ( aSig | 0x00800000 )<<8; + bSig = ( bSig | 0x00800000 )<<8; + if ( expDiff < 0 ) { + if ( expDiff < -1 ) return a; + aSig >>= 1; + } + q = ( bSig <= aSig ); + if ( q ) aSig -= bSig; + expDiff -= 32; + while ( 0 < expDiff ) { + q = estimateDiv64To32( aSig, 0, bSig ); + q = ( 2 < q ) ? q - 2 : 0; + aSig = - ( ( bSig>>2 ) * q ); + expDiff -= 30; + } + expDiff += 32; + if ( 0 < expDiff ) { + q = estimateDiv64To32( aSig, 0, bSig ); + q = ( 2 < q ) ? q - 2 : 0; + q >>= 32 - expDiff; + bSig >>= 2; + aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; + } + else { + aSig >>= 2; + bSig >>= 2; + } + do { + alternateASig = aSig; + ++q; + aSig -= bSig; + } while ( 0 <= (sbits32) aSig ); + sigMean = aSig + alternateASig; + if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { + aSig = alternateASig; + } + zSign = ( (sbits32) aSig < 0 ); + if ( zSign ) aSig = - aSig; + return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig ); + +} +#endif + +#ifndef SOFTFLOAT_FOR_GCC +/* +------------------------------------------------------------------------------- +Returns the square root of the single-precision floating-point value `a'. +The operation is performed according to the IEC/IEEE Standard for Binary +Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_sqrt( float32 a ) +{ + flag aSign; + int16 aExp, zExp; + bits32 aSig, zSig, rem0, rem1, term0, term1; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + if ( aExp == 0xFF ) { + if ( aSig ) return propagateFloat32NaN( a, 0 ); + if ( ! aSign ) return a; + float_raise( float_flag_invalid ); + return float32_default_nan; + } + if ( aSign ) { + if ( ( aExp | aSig ) == 0 ) return a; + float_raise( float_flag_invalid ); + return float32_default_nan; + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return 0; + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E; + aSig = ( aSig | 0x00800000 )<<8; + zSig = estimateSqrt32( aExp, aSig ) + 2; + if ( ( zSig & 0x7F ) <= 5 ) { + if ( zSig < 2 ) { + zSig = 0x7FFFFFFF; + goto roundAndPack; + } + else { + aSig >>= aExp & 1; + mul32To64( zSig, zSig, &term0, &term1 ); + sub64( aSig, 0, term0, term1, &rem0, &rem1 ); + while ( (sbits32) rem0 < 0 ) { + --zSig; + shortShift64Left( 0, zSig, 1, &term0, &term1 ); + term1 |= 1; + add64( rem0, rem1, term0, term1, &rem0, &rem1 ); + } + zSig |= ( ( rem0 | rem1 ) != 0 ); + } + } + shift32RightJamming( zSig, 1, &zSig ); + roundAndPack: + return roundAndPackFloat32( 0, zExp, zSig ); + +} +#endif + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is equal to +the corresponding value `b', and 0 otherwise. The comparison is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_eq( float32 a, float32 b ) +{ + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is less than +or equal to the corresponding value `b', and 0 otherwise. The comparison +is performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_le( float32 a, float32 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); + return ( a == b ) || ( aSign ^ ( a < b ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is less than +the corresponding value `b', and 0 otherwise. The comparison is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_lt( float32 a, float32 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); + return ( a != b ) && ( aSign ^ ( a < b ) ); + +} + +#ifndef SOFTFLOAT_FOR_GCC /* Not needed */ +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is equal to +the corresponding value `b', and 0 otherwise. The invalid exception is +raised if either operand is a NaN. Otherwise, the comparison is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_eq_signaling( float32 a, float32 b ) +{ + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is less than or +equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not +cause an exception. Otherwise, the comparison is performed according to the +IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_le_quiet( float32 a, float32 b ) +{ + flag aSign, bSign; + int16 aExp, bExp; + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); + return ( a == b ) || ( aSign ^ ( a < b ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is less than +the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an +exception. Otherwise, the comparison is performed according to the IEC/IEEE +Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_lt_quiet( float32 a, float32 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); + return ( a != b ) && ( aSign ^ ( a < b ) ); + +} +#endif /* !SOFTFLOAT_FOR_GCC */ + +#ifndef SOFTFLOAT_FOR_GCC /* Not needed */ +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point value +`a' to the 32-bit two's complement integer format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic---which means in particular that the conversion is rounded +according to the current rounding mode. If `a' is a NaN, the largest +positive integer is returned. Otherwise, if the conversion overflows, the +largest integer with the same sign as `a' is returned. +------------------------------------------------------------------------------- +*/ +int32 float64_to_int32( float64 a ) +{ + flag aSign; + int16 aExp, shiftCount; + bits32 aSig0, aSig1, absZ, aSigExtra; + int32 z; + int8 roundingMode; + + aSig1 = extractFloat64Frac1( a ); + aSig0 = extractFloat64Frac0( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + shiftCount = aExp - 0x413; + if ( 0 <= shiftCount ) { + if ( 0x41E < aExp ) { + if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0; + goto invalid; + } + shortShift64Left( + aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra ); + if ( 0x80000000 < absZ ) goto invalid; + } + else { + aSig1 = ( aSig1 != 0 ); + if ( aExp < 0x3FE ) { + aSigExtra = aExp | aSig0 | aSig1; + absZ = 0; + } + else { + aSig0 |= 0x00100000; + aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1; + absZ = aSig0>>( - shiftCount ); + } + } + roundingMode = float_rounding_mode; + if ( roundingMode == float_round_nearest_even ) { + if ( (sbits32) aSigExtra < 0 ) { + ++absZ; + if ( (bits32) ( aSigExtra<<1 ) == 0 ) absZ &= ~1; + } + z = aSign ? - absZ : absZ; + } + else { + aSigExtra = ( aSigExtra != 0 ); + if ( aSign ) { + z = - ( absZ + + ( ( roundingMode == float_round_down ) & aSigExtra ) ); + } + else { + z = absZ + ( ( roundingMode == float_round_up ) & aSigExtra ); + } + } + if ( ( aSign ^ ( z < 0 ) ) && z ) { + invalid: + float_raise( float_flag_invalid ); + return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; + } + if ( aSigExtra ) set_float_exception_inexact_flag(); + return z; + +} +#endif /* !SOFTFLOAT_FOR_GCC */ + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point value +`a' to the 32-bit two's complement integer format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic, except that the conversion is always rounded toward zero. +If `a' is a NaN, the largest positive integer is returned. Otherwise, if +the conversion overflows, the largest integer with the same sign as `a' is +returned. +------------------------------------------------------------------------------- +*/ +int32 float64_to_int32_round_to_zero( float64 a ) +{ + flag aSign; + int16 aExp, shiftCount; + bits32 aSig0, aSig1, absZ, aSigExtra; + int32 z; + + aSig1 = extractFloat64Frac1( a ); + aSig0 = extractFloat64Frac0( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + shiftCount = aExp - 0x413; + if ( 0 <= shiftCount ) { + if ( 0x41E < aExp ) { + if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0; + goto invalid; + } + shortShift64Left( + aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra ); + } + else { + if ( aExp < 0x3FF ) { + if ( aExp | aSig0 | aSig1 ) { + set_float_exception_inexact_flag(); + } + return 0; + } + aSig0 |= 0x00100000; + aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1; + absZ = aSig0>>( - shiftCount ); + } + z = aSign ? - absZ : absZ; + if ( ( aSign ^ ( z < 0 ) ) && z ) { + invalid: + float_raise( float_flag_invalid ); + return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; + } + if ( aSigExtra ) set_float_exception_inexact_flag(); + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point value +`a' to the single-precision floating-point format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float64_to_float32( float64 a ) +{ + flag aSign; + int16 aExp; + bits32 aSig0, aSig1, zSig; + bits32 allZero; + + aSig1 = extractFloat64Frac1( a ); + aSig0 = extractFloat64Frac0( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( aExp == 0x7FF ) { + if ( aSig0 | aSig1 ) { + return commonNaNToFloat32( float64ToCommonNaN( a ) ); + } + return packFloat32( aSign, 0xFF, 0 ); + } + shift64RightJamming( aSig0, aSig1, 22, &allZero, &zSig ); + if ( aExp ) zSig |= 0x40000000; + return roundAndPackFloat32( aSign, aExp - 0x381, zSig ); + +} + +#ifndef SOFTFLOAT_FOR_GCC +/* +------------------------------------------------------------------------------- +Rounds the double-precision floating-point value `a' to an integer, +and returns the result as a double-precision floating-point value. The +operation is performed according to the IEC/IEEE Standard for Binary +Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_round_to_int( float64 a ) +{ + flag aSign; + int16 aExp; + bits32 lastBitMask, roundBitsMask; + int8 roundingMode; + float64 z; + + aExp = extractFloat64Exp( a ); + if ( 0x413 <= aExp ) { + if ( 0x433 <= aExp ) { + if ( ( aExp == 0x7FF ) + && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) { + return propagateFloat64NaN( a, a ); + } + return a; + } + lastBitMask = 1; + lastBitMask = ( lastBitMask<<( 0x432 - aExp ) )<<1; + roundBitsMask = lastBitMask - 1; + z = a; + roundingMode = float_rounding_mode; + if ( roundingMode == float_round_nearest_even ) { + if ( lastBitMask ) { + add64( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low ); + if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; + } + else { + if ( (sbits32) z.low < 0 ) { + ++z.high; + if ( (bits32) ( z.low<<1 ) == 0 ) z.high &= ~1; + } + } + } + else if ( roundingMode != float_round_to_zero ) { + if ( extractFloat64Sign( z ) + ^ ( roundingMode == float_round_up ) ) { + add64( z.high, z.low, 0, roundBitsMask, &z.high, &z.low ); + } + } + z.low &= ~ roundBitsMask; + } + else { + if ( aExp <= 0x3FE ) { + if ( ( ( (bits32) ( a.high<<1 ) ) | a.low ) == 0 ) return a; + set_float_exception_inexact_flag(); + aSign = extractFloat64Sign( a ); + switch ( float_rounding_mode ) { + case float_round_nearest_even: + if ( ( aExp == 0x3FE ) + && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) + ) { + return packFloat64( aSign, 0x3FF, 0, 0 ); + } + break; + case float_round_down: + return + aSign ? packFloat64( 1, 0x3FF, 0, 0 ) + : packFloat64( 0, 0, 0, 0 ); + case float_round_up: + return + aSign ? packFloat64( 1, 0, 0, 0 ) + : packFloat64( 0, 0x3FF, 0, 0 ); + } + return packFloat64( aSign, 0, 0, 0 ); + } + lastBitMask = 1; + lastBitMask <<= 0x413 - aExp; + roundBitsMask = lastBitMask - 1; + z.low = 0; + z.high = a.high; + roundingMode = float_rounding_mode; + if ( roundingMode == float_round_nearest_even ) { + z.high += lastBitMask>>1; + if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) { + z.high &= ~ lastBitMask; + } + } + else if ( roundingMode != float_round_to_zero ) { + if ( extractFloat64Sign( z ) + ^ ( roundingMode == float_round_up ) ) { + z.high |= ( a.low != 0 ); + z.high += roundBitsMask; + } + } + z.high &= ~ roundBitsMask; + } + if ( ( z.low != a.low ) || ( z.high != a.high ) ) { + set_float_exception_inexact_flag(); + } + return z; + +} +#endif + +/* +------------------------------------------------------------------------------- +Returns the result of adding the absolute values of the double-precision +floating-point values `a' and `b'. If `zSign' is 1, the sum is negated +before being returned. `zSign' is ignored if the result is a NaN. +The addition is performed according to the IEC/IEEE Standard for Binary +Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float64 addFloat64Sigs( float64 a, float64 b, flag zSign ) +{ + int16 aExp, bExp, zExp; + bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; + int16 expDiff; + + aSig1 = extractFloat64Frac1( a ); + aSig0 = extractFloat64Frac0( a ); + aExp = extractFloat64Exp( a ); + bSig1 = extractFloat64Frac1( b ); + bSig0 = extractFloat64Frac0( b ); + bExp = extractFloat64Exp( b ); + expDiff = aExp - bExp; + if ( 0 < expDiff ) { + if ( aExp == 0x7FF ) { + if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig0 |= 0x00100000; + } + shift64ExtraRightJamming( + bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 ); + zExp = aExp; + } + else if ( expDiff < 0 ) { + if ( bExp == 0x7FF ) { + if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); + return packFloat64( zSign, 0x7FF, 0, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig0 |= 0x00100000; + } + shift64ExtraRightJamming( + aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 ); + zExp = bExp; + } + else { + if ( aExp == 0x7FF ) { + if ( aSig0 | aSig1 | bSig0 | bSig1 ) { + return propagateFloat64NaN( a, b ); + } + return a; + } + add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); + if ( aExp == 0 ) return packFloat64( zSign, 0, zSig0, zSig1 ); + zSig2 = 0; + zSig0 |= 0x00200000; + zExp = aExp; + goto shiftRight1; + } + aSig0 |= 0x00100000; + add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); + --zExp; + if ( zSig0 < 0x00200000 ) goto roundAndPack; + ++zExp; + shiftRight1: + shift64ExtraRightJamming( zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); + roundAndPack: + return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the absolute values of the double- +precision floating-point values `a' and `b'. If `zSign' is 1, the +difference is negated before being returned. `zSign' is ignored if the +result is a NaN. The subtraction is performed according to the IEC/IEEE +Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) +{ + int16 aExp, bExp, zExp; + bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1; + int16 expDiff; + + aSig1 = extractFloat64Frac1( a ); + aSig0 = extractFloat64Frac0( a ); + aExp = extractFloat64Exp( a ); + bSig1 = extractFloat64Frac1( b ); + bSig0 = extractFloat64Frac0( b ); + bExp = extractFloat64Exp( b ); + expDiff = aExp - bExp; + shortShift64Left( aSig0, aSig1, 10, &aSig0, &aSig1 ); + shortShift64Left( bSig0, bSig1, 10, &bSig0, &bSig1 ); + if ( 0 < expDiff ) goto aExpBigger; + if ( expDiff < 0 ) goto bExpBigger; + if ( aExp == 0x7FF ) { + if ( aSig0 | aSig1 | bSig0 | bSig1 ) { + return propagateFloat64NaN( a, b ); + } + float_raise( float_flag_invalid ); + return float64_default_nan; + } + if ( aExp == 0 ) { + aExp = 1; + bExp = 1; + } + if ( bSig0 < aSig0 ) goto aBigger; + if ( aSig0 < bSig0 ) goto bBigger; + if ( bSig1 < aSig1 ) goto aBigger; + if ( aSig1 < bSig1 ) goto bBigger; + return packFloat64( float_rounding_mode == float_round_down, 0, 0, 0 ); + bExpBigger: + if ( bExp == 0x7FF ) { + if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); + return packFloat64( zSign ^ 1, 0x7FF, 0, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig0 |= 0x40000000; + } + shift64RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); + bSig0 |= 0x40000000; + bBigger: + sub64( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 ); + zExp = bExp; + zSign ^= 1; + goto normalizeRoundAndPack; + aExpBigger: + if ( aExp == 0x7FF ) { + if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig0 |= 0x40000000; + } + shift64RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 ); + aSig0 |= 0x40000000; + aBigger: + sub64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); + zExp = aExp; + normalizeRoundAndPack: + --zExp; + return normalizeRoundAndPackFloat64( zSign, zExp - 10, zSig0, zSig1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of adding the double-precision floating-point values `a' +and `b'. The operation is performed according to the IEC/IEEE Standard for +Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_add( float64 a, float64 b ) +{ + flag aSign, bSign; + + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign == bSign ) { + return addFloat64Sigs( a, b, aSign ); + } + else { + return subFloat64Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the double-precision floating-point values +`a' and `b'. The operation is performed according to the IEC/IEEE Standard +for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_sub( float64 a, float64 b ) +{ + flag aSign, bSign; + + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign == bSign ) { + return subFloat64Sigs( a, b, aSign ); + } + else { + return addFloat64Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of multiplying the double-precision floating-point values +`a' and `b'. The operation is performed according to the IEC/IEEE Standard +for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_mul( float64 a, float64 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, zExp; + bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3; + + aSig1 = extractFloat64Frac1( a ); + aSig0 = extractFloat64Frac0( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + bSig1 = extractFloat64Frac1( b ); + bSig0 = extractFloat64Frac0( b ); + bExp = extractFloat64Exp( b ); + bSign = extractFloat64Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FF ) { + if ( ( aSig0 | aSig1 ) + || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) { + return propagateFloat64NaN( a, b ); + } + if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid; + return packFloat64( zSign, 0x7FF, 0, 0 ); + } + if ( bExp == 0x7FF ) { + if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); + if ( ( aExp | aSig0 | aSig1 ) == 0 ) { + invalid: + float_raise( float_flag_invalid ); + return float64_default_nan; + } + return packFloat64( zSign, 0x7FF, 0, 0 ); + } + if ( aExp == 0 ) { + if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 ); + normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + } + if ( bExp == 0 ) { + if ( ( bSig0 | bSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 ); + normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); + } + zExp = aExp + bExp - 0x400; + aSig0 |= 0x00100000; + shortShift64Left( bSig0, bSig1, 12, &bSig0, &bSig1 ); + mul64To128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 ); + add64( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 ); + zSig2 |= ( zSig3 != 0 ); + if ( 0x00200000 <= zSig0 ) { + shift64ExtraRightJamming( + zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); + ++zExp; + } + return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of dividing the double-precision floating-point value `a' +by the corresponding value `b'. The operation is performed according to the +IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_div( float64 a, float64 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, zExp; + bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; + bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3; + + aSig1 = extractFloat64Frac1( a ); + aSig0 = extractFloat64Frac0( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + bSig1 = extractFloat64Frac1( b ); + bSig0 = extractFloat64Frac0( b ); + bExp = extractFloat64Exp( b ); + bSign = extractFloat64Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FF ) { + if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b ); + if ( bExp == 0x7FF ) { + if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); + goto invalid; + } + return packFloat64( zSign, 0x7FF, 0, 0 ); + } + if ( bExp == 0x7FF ) { + if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); + return packFloat64( zSign, 0, 0, 0 ); + } + if ( bExp == 0 ) { + if ( ( bSig0 | bSig1 ) == 0 ) { + if ( ( aExp | aSig0 | aSig1 ) == 0 ) { + invalid: + float_raise( float_flag_invalid ); + return float64_default_nan; + } + float_raise( float_flag_divbyzero ); + return packFloat64( zSign, 0x7FF, 0, 0 ); + } + normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); + } + if ( aExp == 0 ) { + if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 ); + normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + } + zExp = aExp - bExp + 0x3FD; + shortShift64Left( aSig0 | 0x00100000, aSig1, 11, &aSig0, &aSig1 ); + shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 ); + if ( le64( bSig0, bSig1, aSig0, aSig1 ) ) { + shift64Right( aSig0, aSig1, 1, &aSig0, &aSig1 ); + ++zExp; + } + zSig0 = estimateDiv64To32( aSig0, aSig1, bSig0 ); + mul64By32To96( bSig0, bSig1, zSig0, &term0, &term1, &term2 ); + sub96( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 ); + while ( (sbits32) rem0 < 0 ) { + --zSig0; + add96( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 ); + } + zSig1 = estimateDiv64To32( rem1, rem2, bSig0 ); + if ( ( zSig1 & 0x3FF ) <= 4 ) { + mul64By32To96( bSig0, bSig1, zSig1, &term1, &term2, &term3 ); + sub96( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 ); + while ( (sbits32) rem1 < 0 ) { + --zSig1; + add96( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 ); + } + zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); + } + shift64ExtraRightJamming( zSig0, zSig1, 0, 11, &zSig0, &zSig1, &zSig2 ); + return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 ); + +} + +#ifndef SOFTFLOAT_FOR_GCC +/* +------------------------------------------------------------------------------- +Returns the remainder of the double-precision floating-point value `a' +with respect to the corresponding value `b'. The operation is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_rem( float64 a, float64 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, expDiff; + bits32 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2; + bits32 allZero, alternateASig0, alternateASig1, sigMean1; + sbits32 sigMean0; + float64 z; + + aSig1 = extractFloat64Frac1( a ); + aSig0 = extractFloat64Frac0( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + bSig1 = extractFloat64Frac1( b ); + bSig0 = extractFloat64Frac0( b ); + bExp = extractFloat64Exp( b ); + bSign = extractFloat64Sign( b ); + if ( aExp == 0x7FF ) { + if ( ( aSig0 | aSig1 ) + || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) { + return propagateFloat64NaN( a, b ); + } + goto invalid; + } + if ( bExp == 0x7FF ) { + if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + if ( ( bSig0 | bSig1 ) == 0 ) { + invalid: + float_raise( float_flag_invalid ); + return float64_default_nan; + } + normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); + } + if ( aExp == 0 ) { + if ( ( aSig0 | aSig1 ) == 0 ) return a; + normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + } + expDiff = aExp - bExp; + if ( expDiff < -1 ) return a; + shortShift64Left( + aSig0 | 0x00100000, aSig1, 11 - ( expDiff < 0 ), &aSig0, &aSig1 ); + shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 ); + q = le64( bSig0, bSig1, aSig0, aSig1 ); + if ( q ) sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); + expDiff -= 32; + while ( 0 < expDiff ) { + q = estimateDiv64To32( aSig0, aSig1, bSig0 ); + q = ( 4 < q ) ? q - 4 : 0; + mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 ); + shortShift96Left( term0, term1, term2, 29, &term1, &term2, &allZero ); + shortShift64Left( aSig0, aSig1, 29, &aSig0, &allZero ); + sub64( aSig0, 0, term1, term2, &aSig0, &aSig1 ); + expDiff -= 29; + } + if ( -32 < expDiff ) { + q = estimateDiv64To32( aSig0, aSig1, bSig0 ); + q = ( 4 < q ) ? q - 4 : 0; + q >>= - expDiff; + shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 ); + expDiff += 24; + if ( expDiff < 0 ) { + shift64Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); + } + else { + shortShift64Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 ); + } + mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 ); + sub64( aSig0, aSig1, term1, term2, &aSig0, &aSig1 ); + } + else { + shift64Right( aSig0, aSig1, 8, &aSig0, &aSig1 ); + shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 ); + } + do { + alternateASig0 = aSig0; + alternateASig1 = aSig1; + ++q; + sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); + } while ( 0 <= (sbits32) aSig0 ); + add64( + aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 ); + if ( ( sigMean0 < 0 ) + || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) { + aSig0 = alternateASig0; + aSig1 = alternateASig1; + } + zSign = ( (sbits32) aSig0 < 0 ); + if ( zSign ) sub64( 0, 0, aSig0, aSig1, &aSig0, &aSig1 ); + return + normalizeRoundAndPackFloat64( aSign ^ zSign, bExp - 4, aSig0, aSig1 ); + +} +#endif + +#ifndef SOFTFLOAT_FOR_GCC +/* +------------------------------------------------------------------------------- +Returns the square root of the double-precision floating-point value `a'. +The operation is performed according to the IEC/IEEE Standard for Binary +Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_sqrt( float64 a ) +{ + flag aSign; + int16 aExp, zExp; + bits32 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0; + bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3; + float64 z; + + aSig1 = extractFloat64Frac1( a ); + aSig0 = extractFloat64Frac0( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( aExp == 0x7FF ) { + if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, a ); + if ( ! aSign ) return a; + goto invalid; + } + if ( aSign ) { + if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a; + invalid: + float_raise( float_flag_invalid ); + return float64_default_nan; + } + if ( aExp == 0 ) { + if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( 0, 0, 0, 0 ); + normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + } + zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE; + aSig0 |= 0x00100000; + shortShift64Left( aSig0, aSig1, 11, &term0, &term1 ); + zSig0 = ( estimateSqrt32( aExp, term0 )>>1 ) + 1; + if ( zSig0 == 0 ) zSig0 = 0x7FFFFFFF; + doubleZSig0 = zSig0 + zSig0; + shortShift64Left( aSig0, aSig1, 9 - ( aExp & 1 ), &aSig0, &aSig1 ); + mul32To64( zSig0, zSig0, &term0, &term1 ); + sub64( aSig0, aSig1, term0, term1, &rem0, &rem1 ); + while ( (sbits32) rem0 < 0 ) { + --zSig0; + doubleZSig0 -= 2; + add64( rem0, rem1, 0, doubleZSig0 | 1, &rem0, &rem1 ); + } + zSig1 = estimateDiv64To32( rem1, 0, doubleZSig0 ); + if ( ( zSig1 & 0x1FF ) <= 5 ) { + if ( zSig1 == 0 ) zSig1 = 1; + mul32To64( doubleZSig0, zSig1, &term1, &term2 ); + sub64( rem1, 0, term1, term2, &rem1, &rem2 ); + mul32To64( zSig1, zSig1, &term2, &term3 ); + sub96( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); + while ( (sbits32) rem1 < 0 ) { + --zSig1; + shortShift64Left( 0, zSig1, 1, &term2, &term3 ); + term3 |= 1; + term2 |= doubleZSig0; + add96( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 ); + } + zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); + } + shift64ExtraRightJamming( zSig0, zSig1, 0, 10, &zSig0, &zSig1, &zSig2 ); + return roundAndPackFloat64( 0, zExp, zSig0, zSig1, zSig2 ); + +} +#endif + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is equal to +the corresponding value `b', and 0 otherwise. The comparison is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_eq( float64 a, float64 b ) +{ + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) + && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) + && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) + ) { + if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + return ( a == b ) || + ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is less than +or equal to the corresponding value `b', and 0 otherwise. The comparison +is performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_le( float64 a, float64 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) + && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) + && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign != bSign ) + return aSign || + ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == + 0 ); + return ( a == b ) || + ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) ); +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is less than +the corresponding value `b', and 0 otherwise. The comparison is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_lt( float64 a, float64 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) + && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) + && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign != bSign ) + return aSign && + ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) != + 0 ); + return ( a != b ) && + ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) ); + +} + +#ifndef SOFTFLOAT_FOR_GCC +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is equal to +the corresponding value `b', and 0 otherwise. The invalid exception is +raised if either operand is a NaN. Otherwise, the comparison is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_eq_signaling( float64 a, float64 b ) +{ + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) + && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) + && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is less than or +equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not +cause an exception. Otherwise, the comparison is performed according to the +IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_le_quiet( float64 a, float64 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) + && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) + && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) + ) { + if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 ); + return ( a == b ) || ( aSign ^ ( a < b ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is less than +the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an +exception. Otherwise, the comparison is performed according to the IEC/IEEE +Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_lt_quiet( float64 a, float64 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) + && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) + && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) + ) { + if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 ); + return ( a != b ) && ( aSign ^ ( a < b ) ); + +} + +#endif diff --git a/ArmPkg/Library/ArmSoftFloatLib/milieu.h b/ArmPkg/Library/ArmSoftFloatLib/milieu.h new file mode 100644 index 0000000000..8f4ac00076 --- /dev/null +++ b/ArmPkg/Library/ArmSoftFloatLib/milieu.h @@ -0,0 +1,38 @@ +/* $NetBSD: milieu.h,v 1.1 2000/12/29 20:13:54 bjh21 Exp $ */ + +/* +=============================================================================== + +This C header file is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2a. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these four paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +/* +------------------------------------------------------------------------------- +Include common integer types and flags. +------------------------------------------------------------------------------- +*/ +#include "arm-gcc.h" diff --git a/ArmPkg/Library/ArmSoftFloatLib/softfloat-for-gcc.h b/ArmPkg/Library/ArmSoftFloatLib/softfloat-for-gcc.h new file mode 100644 index 0000000000..420cecc298 --- /dev/null +++ b/ArmPkg/Library/ArmSoftFloatLib/softfloat-for-gcc.h @@ -0,0 +1,242 @@ +/* $NetBSD: softfloat-for-gcc.h,v 1.12 2013/08/01 23:21:19 matt Exp $ */ +/*- + * Copyright (c) 2008 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Move private identifiers with external linkage into implementation + * namespace. -- Klaus Klein , May 5, 1999 + */ +#define float_exception_flags _softfloat_float_exception_flags +#define float_exception_mask _softfloat_float_exception_mask +#define float_rounding_mode _softfloat_float_rounding_mode +#define float_raise _softfloat_float_raise + +/* The following batch are called by GCC through wrappers */ +#define float32_eq _softfloat_float32_eq +#define float32_le _softfloat_float32_le +#define float32_lt _softfloat_float32_lt +#define float64_eq _softfloat_float64_eq +#define float64_le _softfloat_float64_le +#define float64_lt _softfloat_float64_lt +#define float128_eq _softfloat_float128_eq +#define float128_le _softfloat_float128_le +#define float128_lt _softfloat_float128_lt + +/* + * Macros to define functions with the GCC expected names + */ + +#define float32_add __addsf3 +#define float64_add __adddf3 +#define floatx80_add __addxf3 +#define float128_add __addtf3 + +#define float32_sub __subsf3 +#define float64_sub __subdf3 +#define floatx80_sub __subxf3 +#define float128_sub __subtf3 + +#define float32_mul __mulsf3 +#define float64_mul __muldf3 +#define floatx80_mul __mulxf3 +#define float128_mul __multf3 + +#define float32_div __divsf3 +#define float64_div __divdf3 +#define floatx80_div __divxf3 +#define float128_div __divtf3 + +#if 0 +#define float32_neg __negsf2 +#define float64_neg __negdf2 +#define floatx80_neg __negxf2 +#define float128_neg __negtf2 +#endif + +#define int32_to_float32 __floatsisf +#define int32_to_float64 __floatsidf +#define int32_to_floatx80 __floatsixf +#define int32_to_float128 __floatsitf + +#define int64_to_float32 __floatdisf +#define int64_to_float64 __floatdidf +#define int64_to_floatx80 __floatdixf +#define int64_to_float128 __floatditf + +#define int128_to_float32 __floattisf +#define int128_to_float64 __floattidf +#define int128_to_floatx80 __floattixf +#define int128_to_float128 __floattitf + +#define uint32_to_float32 __floatunsisf +#define uint32_to_float64 __floatunsidf +#define uint32_to_floatx80 __floatunsixf +#define uint32_to_float128 __floatunsitf + +#define uint64_to_float32 __floatundisf +#define uint64_to_float64 __floatundidf +#define uint64_to_floatx80 __floatundixf +#define uint64_to_float128 __floatunditf + +#define uint128_to_float32 __floatuntisf +#define uint128_to_float64 __floatuntidf +#define uint128_to_floatx80 __floatuntixf +#define uint128_to_float128 __floatuntitf + +#define float32_to_int32_round_to_zero __fixsfsi +#define float64_to_int32_round_to_zero __fixdfsi +#define floatx80_to_int32_round_to_zero __fixxfsi +#define float128_to_int32_round_to_zero __fixtfsi + +#define float32_to_int64_round_to_zero __fixsfdi +#define float64_to_int64_round_to_zero __fixdfdi +#define floatx80_to_int64_round_to_zero __fixxfdi +#define float128_to_int64_round_to_zero __fixtfdi + +#define float32_to_int128_round_to_zero __fixsfti +#define float64_to_int128_round_to_zero __fixdfti +#define floatx80_to_int128_round_to_zero __fixxfti +#define float128_to_int128_round_to_zero __fixtfti + +#define float32_to_uint32_round_to_zero __fixunssfsi +#define float64_to_uint32_round_to_zero __fixunsdfsi +#define floatx80_to_uint32_round_to_zero __fixunsxfsi +#define float128_to_uint32_round_to_zero __fixunstfsi + +#define float32_to_uint64_round_to_zero __fixunssfdi +#define float64_to_uint64_round_to_zero __fixunsdfdi +#define floatx80_to_uint64_round_to_zero __fixunsxfdi +#define float128_to_uint64_round_to_zero __fixunstfdi + +#define float32_to_uint128_round_to_zero __fixunssfti +#define float64_to_uint128_round_to_zero __fixunsdfti +#define floatx80_to_uint128_round_to_zero __fixunsxfti +#define float128_to_uint128_round_to_zero __fixunstfti + +#define float32_to_float64 __extendsfdf2 +#define float32_to_floatx80 __extendsfxf2 +#define float32_to_float128 __extendsftf2 +#define float64_to_floatx80 __extenddfxf2 +#define float64_to_float128 __extenddftf2 + +#define float128_to_float64 __trunctfdf2 +#define floatx80_to_float64 __truncxfdf2 +#define float128_to_float32 __trunctfsf2 +#define floatx80_to_float32 __truncxfsf2 +#define float64_to_float32 __truncdfsf2 + +#if 0 +#define float32_cmp __cmpsf2 +#define float32_unord __unordsf2 +#define float32_eq __eqsf2 +#define float32_ne __nesf2 +#define float32_ge __gesf2 +#define float32_lt __ltsf2 +#define float32_le __lesf2 +#define float32_gt __gtsf2 +#endif + +#if 0 +#define float64_cmp __cmpdf2 +#define float64_unord __unorddf2 +#define float64_eq __eqdf2 +#define float64_ne __nedf2 +#define float64_ge __gedf2 +#define float64_lt __ltdf2 +#define float64_le __ledf2 +#define float64_gt __gtdf2 +#endif + +/* XXX not in libgcc */ +#if 1 +#define floatx80_cmp __cmpxf2 +#define floatx80_unord __unordxf2 +#define floatx80_eq __eqxf2 +#define floatx80_ne __nexf2 +#define floatx80_ge __gexf2 +#define floatx80_lt __ltxf2 +#define floatx80_le __lexf2 +#define floatx80_gt __gtxf2 +#endif + +#if 0 +#define float128_cmp __cmptf2 +#define float128_unord __unordtf2 +#define float128_eq __eqtf2 +#define float128_ne __netf2 +#define float128_ge __getf2 +#define float128_lt __lttf2 +#define float128_le __letf2 +#define float128_gt __gttf2 +#endif + +#ifdef __ARM_EABI__ +#ifdef __ARM_PCS_VFP +#include +#endif +#define __addsf3 __aeabi_fadd +#define __adddf3 __aeabi_dadd + +#define __subsf3 __aeabi_fsub +#define __subdf3 __aeabi_dsub + +#define __mulsf3 __aeabi_fmul +#define __muldf3 __aeabi_dmul + +#define __divsf3 __aeabi_fdiv +#define __divdf3 __aeabi_ddiv + +#define __floatsisf __aeabi_i2f +#define __floatsidf __aeabi_i2d + +#define __floatdisf __aeabi_l2f +#define __floatdidf __aeabi_l2d + +#define __floatunsisf __aeabi_ui2f +#define __floatunsidf __aeabi_ui2d + +#define __floatundisf __aeabi_ul2f +#define __floatundidf __aeabi_ul2d + +#define __fixsfsi __aeabi_f2iz +#define __fixdfsi __aeabi_d2iz + +#define __fixsfdi __aeabi_f2lz +#define __fixdfdi __aeabi_d2lz + +#define __fixunssfsi __aeabi_f2uiz +#define __fixunsdfsi __aeabi_d2uiz + +#define __fixunssfdi __aeabi_f2ulz +#define __fixunsdfdi __aeabi_d2ulz + +#define __extendsfdf2 __aeabi_f2d +#define __truncdfsf2 __aeabi_d2f + +#endif /* __ARM_EABI__ */ diff --git a/ArmPkg/Library/ArmSoftFloatLib/softfloat-specialize b/ArmPkg/Library/ArmSoftFloatLib/softfloat-specialize new file mode 100644 index 0000000000..13ada988d1 --- /dev/null +++ b/ArmPkg/Library/ArmSoftFloatLib/softfloat-specialize @@ -0,0 +1,529 @@ +/* $NetBSD: softfloat-specialize,v 1.8 2013/01/10 08:16:10 matt Exp $ */ + +/* This is a derivative work. */ + +/* +=============================================================================== + +This C source fragment is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2a. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these four paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +#include +#include +#include + +/* +------------------------------------------------------------------------------- +Underflow tininess-detection mode, statically initialized to default value. +(The declaration in `softfloat.h' must match the `int8' type here.) +------------------------------------------------------------------------------- +*/ +#ifdef SOFTFLOAT_FOR_GCC +static +#endif +int8 float_detect_tininess = float_tininess_after_rounding; + +/* +------------------------------------------------------------------------------- +Raises the exceptions specified by `flags'. Floating-point traps can be +defined here if desired. It is currently not possible for such a trap to +substitute a result value. If traps are not implemented, this routine +should be simply `float_exception_flags |= flags;'. +------------------------------------------------------------------------------- +*/ +#ifdef SOFTFLOAT_FOR_GCC +#ifndef set_float_exception_mask +#define float_exception_mask _softfloat_float_exception_mask +#endif +#endif +#ifndef set_float_exception_mask +fp_except float_exception_mask = 0; +#endif +void +float_raise( fp_except flags ) +{ + +#if 0 // Don't raise exceptions + siginfo_t info; + fp_except mask = float_exception_mask; + +#ifdef set_float_exception_mask + flags |= set_float_exception_flags(flags, 0); +#else + float_exception_flags |= flags; + flags = float_exception_flags; +#endif + + flags &= mask; + if ( flags ) { + memset(&info, 0, sizeof info); + info.si_signo = SIGFPE; + info.si_pid = getpid(); + info.si_uid = geteuid(); + if (flags & float_flag_underflow) + info.si_code = FPE_FLTUND; + else if (flags & float_flag_overflow) + info.si_code = FPE_FLTOVF; + else if (flags & float_flag_divbyzero) + info.si_code = FPE_FLTDIV; + else if (flags & float_flag_invalid) + info.si_code = FPE_FLTINV; + else if (flags & float_flag_inexact) + info.si_code = FPE_FLTRES; + sigqueueinfo(getpid(), &info); + } +#else // Don't raise exceptions + float_exception_flags |= flags; +#endif // Don't raise exceptions +} +#undef float_exception_mask + +/* +------------------------------------------------------------------------------- +Internal canonical NaN format. +------------------------------------------------------------------------------- +*/ +typedef struct { + flag sign; + bits64 high, low; +} commonNaNT; + +/* +------------------------------------------------------------------------------- +The pattern for a default generated single-precision NaN. +------------------------------------------------------------------------------- +*/ +#define float32_default_nan 0xFFFFFFFF + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is a NaN; +otherwise returns 0. +------------------------------------------------------------------------------- +*/ +#ifdef SOFTFLOAT_FOR_GCC +static +#endif +flag float32_is_nan( float32 a ) +{ + + return ( (bits32)0xFF000000 < (bits32) ( a<<1 ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is a signaling +NaN; otherwise returns 0. +------------------------------------------------------------------------------- +*/ +#if defined(SOFTFLOAT_FOR_GCC) && !defined(SOFTFLOATSPARC64_FOR_GCC) && \ + !defined(SOFTFLOAT_M68K_FOR_GCC) +static +#endif +flag float32_is_signaling_nan( float32 a ) +{ + + return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the single-precision floating-point NaN +`a' to the canonical NaN format. If `a' is a signaling NaN, the invalid +exception is raised. +------------------------------------------------------------------------------- +*/ +static commonNaNT float32ToCommonNaN( float32 a ) +{ + commonNaNT z; + + if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); + z.sign = a>>31; + z.low = 0; + z.high = ( (bits64) a )<<41; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the canonical NaN `a' to the single- +precision floating-point format. +------------------------------------------------------------------------------- +*/ +static float32 commonNaNToFloat32( commonNaNT a ) +{ + + return ( ( (bits32) a.sign )<<31 ) | 0x7FC00000 | (bits32)( a.high>>41 ); + +} + +/* +------------------------------------------------------------------------------- +Takes two single-precision floating-point values `a' and `b', one of which +is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a +signaling NaN, the invalid exception is raised. +------------------------------------------------------------------------------- +*/ +static float32 propagateFloat32NaN( float32 a, float32 b ) +{ + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; + + aIsNaN = float32_is_nan( a ); + aIsSignalingNaN = float32_is_signaling_nan( a ); + bIsNaN = float32_is_nan( b ); + bIsSignalingNaN = float32_is_signaling_nan( b ); + a |= 0x00400000; + b |= 0x00400000; + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); + if ( aIsNaN ) { + return ( aIsSignalingNaN & bIsNaN ) ? b : a; + } + else { + return b; + } + +} + +/* +------------------------------------------------------------------------------- +The pattern for a default generated double-precision NaN. +------------------------------------------------------------------------------- +*/ +#define float64_default_nan LIT64( 0xFFFFFFFFFFFFFFFF ) + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is a NaN; +otherwise returns 0. +------------------------------------------------------------------------------- +*/ +#ifdef SOFTFLOAT_FOR_GCC +static +#endif +flag float64_is_nan( float64 a ) +{ + + return ( (bits64)LIT64( 0xFFE0000000000000 ) < + (bits64) ( FLOAT64_DEMANGLE(a)<<1 ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is a signaling +NaN; otherwise returns 0. +------------------------------------------------------------------------------- +*/ +#if defined(SOFTFLOAT_FOR_GCC) && !defined(SOFTFLOATSPARC64_FOR_GCC) && \ + !defined(SOFTFLOATM68K_FOR_GCC) +static +#endif +flag float64_is_signaling_nan( float64 a ) +{ + + return + ( ( ( FLOAT64_DEMANGLE(a)>>51 ) & 0xFFF ) == 0xFFE ) + && ( FLOAT64_DEMANGLE(a) & LIT64( 0x0007FFFFFFFFFFFF ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point NaN +`a' to the canonical NaN format. If `a' is a signaling NaN, the invalid +exception is raised. +------------------------------------------------------------------------------- +*/ +static commonNaNT float64ToCommonNaN( float64 a ) +{ + commonNaNT z; + + if ( float64_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); + z.sign = (flag)(FLOAT64_DEMANGLE(a)>>63); + z.low = 0; + z.high = FLOAT64_DEMANGLE(a)<<12; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the canonical NaN `a' to the double- +precision floating-point format. +------------------------------------------------------------------------------- +*/ +static float64 commonNaNToFloat64( commonNaNT a ) +{ + + return FLOAT64_MANGLE( + ( ( (bits64) a.sign )<<63 ) + | LIT64( 0x7FF8000000000000 ) + | ( a.high>>12 ) ); + +} + +/* +------------------------------------------------------------------------------- +Takes two double-precision floating-point values `a' and `b', one of which +is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a +signaling NaN, the invalid exception is raised. +------------------------------------------------------------------------------- +*/ +static float64 propagateFloat64NaN( float64 a, float64 b ) +{ + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; + + aIsNaN = float64_is_nan( a ); + aIsSignalingNaN = float64_is_signaling_nan( a ); + bIsNaN = float64_is_nan( b ); + bIsSignalingNaN = float64_is_signaling_nan( b ); + a |= FLOAT64_MANGLE(LIT64( 0x0008000000000000 )); + b |= FLOAT64_MANGLE(LIT64( 0x0008000000000000 )); + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); + if ( aIsNaN ) { + return ( aIsSignalingNaN & bIsNaN ) ? b : a; + } + else { + return b; + } + +} + +#ifdef FLOATX80 + +/* +------------------------------------------------------------------------------- +The pattern for a default generated extended double-precision NaN. The +`high' and `low' values hold the most- and least-significant bits, +respectively. +------------------------------------------------------------------------------- +*/ +#define floatx80_default_nan_high 0xFFFF +#define floatx80_default_nan_low LIT64( 0xFFFFFFFFFFFFFFFF ) + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is a +NaN; otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag floatx80_is_nan( floatx80 a ) +{ + + return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( a.low<<1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is a +signaling NaN; otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag floatx80_is_signaling_nan( floatx80 a ) +{ + bits64 aLow; + + aLow = a.low & ~ LIT64( 0x4000000000000000 ); + return + ( ( a.high & 0x7FFF ) == 0x7FFF ) + && (bits64) ( aLow<<1 ) + && ( a.low == aLow ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the extended double-precision floating- +point NaN `a' to the canonical NaN format. If `a' is a signaling NaN, the +invalid exception is raised. +------------------------------------------------------------------------------- +*/ +static commonNaNT floatx80ToCommonNaN( floatx80 a ) +{ + commonNaNT z; + + if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); + z.sign = a.high>>15; + z.low = 0; + z.high = a.low<<1; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the canonical NaN `a' to the extended +double-precision floating-point format. +------------------------------------------------------------------------------- +*/ +static floatx80 commonNaNToFloatx80( commonNaNT a ) +{ + floatx80 z; + + z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 ); + z.high = ( ( (bits16) a.sign )<<15 ) | 0x7FFF; + return z; + +} + +/* +------------------------------------------------------------------------------- +Takes two extended double-precision floating-point values `a' and `b', one +of which is a NaN, and returns the appropriate NaN result. If either `a' or +`b' is a signaling NaN, the invalid exception is raised. +------------------------------------------------------------------------------- +*/ +static floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b ) +{ + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; + + aIsNaN = floatx80_is_nan( a ); + aIsSignalingNaN = floatx80_is_signaling_nan( a ); + bIsNaN = floatx80_is_nan( b ); + bIsSignalingNaN = floatx80_is_signaling_nan( b ); + a.low |= LIT64( 0xC000000000000000 ); + b.low |= LIT64( 0xC000000000000000 ); + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); + if ( aIsNaN ) { + return ( aIsSignalingNaN & bIsNaN ) ? b : a; + } + else { + return b; + } + +} + +#endif + +#ifdef FLOAT128 + +/* +------------------------------------------------------------------------------- +The pattern for a default generated quadruple-precision NaN. The `high' and +`low' values hold the most- and least-significant bits, respectively. +------------------------------------------------------------------------------- +*/ +#define float128_default_nan_high LIT64( 0xFFFFFFFFFFFFFFFF ) +#define float128_default_nan_low LIT64( 0xFFFFFFFFFFFFFFFF ) + +/* +------------------------------------------------------------------------------- +Returns 1 if the quadruple-precision floating-point value `a' is a NaN; +otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag float128_is_nan( float128 a ) +{ + + return + ( (bits64)LIT64( 0xFFFE000000000000 ) <= (bits64) ( a.high<<1 ) ) + && ( a.low || ( a.high & LIT64( 0x0000FFFFFFFFFFFF ) ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the quadruple-precision floating-point value `a' is a +signaling NaN; otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag float128_is_signaling_nan( float128 a ) +{ + + return + ( ( ( a.high>>47 ) & 0xFFFF ) == 0xFFFE ) + && ( a.low || ( a.high & LIT64( 0x00007FFFFFFFFFFF ) ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the quadruple-precision floating-point NaN +`a' to the canonical NaN format. If `a' is a signaling NaN, the invalid +exception is raised. +------------------------------------------------------------------------------- +*/ +static commonNaNT float128ToCommonNaN( float128 a ) +{ + commonNaNT z; + + if ( float128_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); + z.sign = (flag)(a.high>>63); + shortShift128Left( a.high, a.low, 16, &z.high, &z.low ); + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the canonical NaN `a' to the quadruple- +precision floating-point format. +------------------------------------------------------------------------------- +*/ +static float128 commonNaNToFloat128( commonNaNT a ) +{ + float128 z; + + shift128Right( a.high, a.low, 16, &z.high, &z.low ); + z.high |= ( ( (bits64) a.sign )<<63 ) | LIT64( 0x7FFF800000000000 ); + return z; + +} + +/* +------------------------------------------------------------------------------- +Takes two quadruple-precision floating-point values `a' and `b', one of +which is a NaN, and returns the appropriate NaN result. If either `a' or +`b' is a signaling NaN, the invalid exception is raised. +------------------------------------------------------------------------------- +*/ +static float128 propagateFloat128NaN( float128 a, float128 b ) +{ + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; + + aIsNaN = float128_is_nan( a ); + aIsSignalingNaN = float128_is_signaling_nan( a ); + bIsNaN = float128_is_nan( b ); + bIsSignalingNaN = float128_is_signaling_nan( b ); + a.high |= LIT64( 0x0000800000000000 ); + b.high |= LIT64( 0x0000800000000000 ); + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); + if ( aIsNaN ) { + return ( aIsSignalingNaN & bIsNaN ) ? b : a; + } + else { + return b; + } + +} + +#endif +