-
Notifications
You must be signed in to change notification settings - Fork 13.3k
[libc][math][c23] Add hypotf16 function #131991
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
@overmighty @lntue requesting review. |
@llvm/pr-subscribers-libc Author: Tejas Vipin (meltq) ChangesImplement hypot for Float16 along with tests. Patch is 44.37 KiB, truncated to 20.00 KiB below, full version: https://github.com./llvm/llvm-project/pull/131991.diff 15 Files Affected:
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index a29478898fe70..f2a936962dd6d 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -701,6 +701,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
libc.src.math.fromfpf16
libc.src.math.fromfpxf16
libc.src.math.getpayloadf16
+ libc.src.math.hypotf16
libc.src.math.ilogbf16
libc.src.math.iscanonicalf16
libc.src.math.issignalingf16
diff --git a/libc/docs/headers/math/index.rst b/libc/docs/headers/math/index.rst
index 5b855ce4881c3..ff7fd32690b3c 100644
--- a/libc/docs/headers/math/index.rst
+++ b/libc/docs/headers/math/index.rst
@@ -305,7 +305,7 @@ Higher Math Functions
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| fsqrt | N/A | |check| | |check| | N/A | |check|\* | 7.12.14.6 | F.10.11 |
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| hypot | |check| | |check| | | | | 7.12.7.4 | F.10.4.4 |
+| hypot | |check| | |check| | | |check| | | 7.12.7.4 | F.10.4.4 |
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| lgamma | | | | | | 7.12.8.3 | F.10.5.3 |
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
diff --git a/libc/include/math.yaml b/libc/include/math.yaml
index a66f981030864..b0bb5d74c5605 100644
--- a/libc/include/math.yaml
+++ b/libc/include/math.yaml
@@ -1366,6 +1366,14 @@ functions:
arguments:
- type: float
- type: float
+ - name: hypotf16
+ standards:
+ - stdc
+ return_type: _Float16
+ arguments:
+ - type: _Float16
+ - type: _Float16
+ guard: LIBC_TYPES_HAS_FLOAT16
- name: ilogb
standards:
- stdc
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
index f18a73d46f9aa..3a098951bb205 100644
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -310,6 +310,7 @@ add_math_entrypoint_object(getpayloadf128)
add_math_entrypoint_object(hypot)
add_math_entrypoint_object(hypotf)
+add_math_entrypoint_object(hypotf16)
add_math_entrypoint_object(ilogb)
add_math_entrypoint_object(ilogbf)
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 3114289bad486..e980d103a1d22 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -3105,6 +3105,22 @@ add_entrypoint_object(
libc.src.__support.macros.optimization
)
+add_entrypoint_object(
+ hypotf16
+ SRCS
+ hypotf16.cpp
+ HDRS
+ ../hypotf16.h
+ DEPENDS
+ libc.src.__support.FPUtil.fenv_impl
+ libc.src.__support.FPUtil.fp_bits
+ libc.src.__support.FPUtil.cast
+ libc.src.__support.FPUtil.multiply_add
+ libc.src.__support.FPUtil.sqrt
+ libc.src.__support.macros.optimization
+ libc.src.__support.macros.properties.types
+)
+
add_entrypoint_object(
fdim
SRCS
diff --git a/libc/src/math/generic/hypotf16.cpp b/libc/src/math/generic/hypotf16.cpp
new file mode 100644
index 0000000000000..b7255bc4420db
--- /dev/null
+++ b/libc/src/math/generic/hypotf16.cpp
@@ -0,0 +1,86 @@
+//===-- Implementation of hypotf16 function -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/hypotf16.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/cast.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/sqrt.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/optimization.h"
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(float16, hypotf16, (float16 x, float16 y)) {
+ using FloatBits = fputil::FPBits<float>;
+ using FPBits = fputil::FPBits<float16>;
+
+ FPBits x_abs = FPBits(x).abs();
+ FPBits y_abs = FPBits(y).abs();
+
+ bool x_abs_larger = x_abs.uintval() >= y_abs.uintval();
+
+ FPBits a_bits = x_abs_larger ? x_abs : y_abs;
+ FPBits b_bits = x_abs_larger ? y_abs : x_abs;
+
+ uint16_t a_u = a_bits.uintval();
+ uint16_t b_u = b_bits.uintval();
+
+ // Note: replacing `a_u >= FPBits::EXP_MASK` with `a_bits.is_inf_or_nan()`
+ // generates extra exponent bit masking instructions on x86-64.
+ if (LIBC_UNLIKELY(a_u >= FPBits::EXP_MASK)) {
+ // x or y is inf or nan
+ if (a_bits.is_signaling_nan() || b_bits.is_signaling_nan()) {
+ fputil::raise_except_if_required(FE_INVALID);
+ return FPBits::quiet_nan().get_val();
+ }
+ if (a_bits.is_inf() || b_bits.is_inf())
+ return FPBits::inf().get_val();
+ return a_bits.get_val();
+ }
+
+ if (LIBC_UNLIKELY(a_u - b_u >=
+ static_cast<uint16_t>((FPBits::FRACTION_LEN + 2)
+ << FPBits::FRACTION_LEN)))
+ return x_abs.get_val() + y_abs.get_val();
+
+ float ad = fputil::cast<float>(a_bits.get_val());
+ float bd = fputil::cast<float>(b_bits.get_val());
+
+ // These squares are exact.
+ float a_sq = ad * ad;
+ float sum_sq = fputil::multiply_add(bd, bd, a_sq);
+
+ FloatBits result(fputil::sqrt<float>(sum_sq));
+ uint32_t r_u = result.uintval();
+
+ // If any of the sticky bits of the result are non-zero, except the LSB, then
+ // the rounded result is correct.
+ if (LIBC_UNLIKELY(((r_u + 1) & 0x0000'0FFE) == 0)) {
+ float r_d = result.get_val();
+
+ // Perform rounding correction.
+ float sum_sq_lo = fputil::multiply_add(bd, bd, a_sq - sum_sq);
+ float err = sum_sq_lo - fputil::multiply_add(r_d, r_d, -sum_sq);
+
+ if (err > 0) {
+ r_u |= 1;
+ } else if ((err < 0) && (r_u & 1) == 0) {
+ r_u -= 1;
+ } else if ((r_u & 0x0000'1FFF) == 0) {
+ // The rounded result is exact.
+ fputil::clear_except_if_required(FE_INEXACT);
+ }
+ return fputil::cast<float16>(FloatBits(r_u).get_val());
+ }
+
+ return fputil::cast<float16>(result.get_val());
+}
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/hypotf16.h b/libc/src/math/hypotf16.h
new file mode 100644
index 0000000000000..2d37c61b4ee7b
--- /dev/null
+++ b/libc/src/math/hypotf16.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for hypotf16 ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_HYPOTF16_H
+#define LLVM_LIBC_SRC_MATH_HYPOTF16_H
+
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+float16 hypotf16(float16 x, float16 y);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_MATH_HYPOTF16_H
diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt
index 53ddd301900c0..6daaacac99345 100644
--- a/libc/test/src/math/CMakeLists.txt
+++ b/libc/test/src/math/CMakeLists.txt
@@ -1701,6 +1701,17 @@ add_fp_unittest(
libc.src.__support.FPUtil.fp_bits
)
+add_fp_unittest(
+ hypotf16_test
+ NEED_MPFR
+ SUITE
+ libc-math-unittests
+ SRCS
+ hypotf16_test.cpp
+ DEPENDS
+ libc.src.math.hypotf16
+)
+
add_fp_unittest(
nextafter_test
SUITE
diff --git a/libc/test/src/math/HypotTest.h b/libc/test/src/math/HypotTest.h
index fd0c1b394b8f7..dc73581e67ff0 100644
--- a/libc/test/src/math/HypotTest.h
+++ b/libc/test/src/math/HypotTest.h
@@ -73,7 +73,7 @@ class HypotTestTemplate : public LIBC_NAMESPACE::testing::FEnvSafeTest {
constexpr StorageType COUNT = 10'001;
for (unsigned scale = 0; scale < 4; ++scale) {
StorageType max_value = MAX_SUBNORMAL << scale;
- StorageType step = (max_value - MIN_SUBNORMAL) / COUNT;
+ StorageType step = (max_value - MIN_SUBNORMAL) / COUNT + 1;
for (int signs = 0; signs < 4; ++signs) {
for (StorageType v = MIN_SUBNORMAL, w = max_value;
v <= max_value && w >= MIN_SUBNORMAL; v += step, w -= step) {
diff --git a/libc/test/src/math/exhaustive/CMakeLists.txt b/libc/test/src/math/exhaustive/CMakeLists.txt
index b1927dbc19a3b..551f449c9c8db 100644
--- a/libc/test/src/math/exhaustive/CMakeLists.txt
+++ b/libc/test/src/math/exhaustive/CMakeLists.txt
@@ -314,6 +314,24 @@ add_fp_unittest(
-lpthread
)
+add_fp_unittest(
+ hypotf16_test
+ NO_RUN_POSTBUILD
+ NEED_MPFR
+ SUITE
+ libc_math_exhaustive_tests
+ SRCS
+ hypotf16_test.cpp
+ COMPILE_OPTIONS
+ ${libc_opt_high_flag}
+ DEPENDS
+ .exhaustive_test
+ libc.src.math.hypotf16
+ libc.src.__support.FPUtil.fp_bits
+ LINK_LIBRARIES
+ -lpthread
+)
+
add_fp_unittest(
fmod_generic_impl_test
NO_RUN_POSTBUILD
diff --git a/libc/test/src/math/exhaustive/hypotf16_test.cpp b/libc/test/src/math/exhaustive/hypotf16_test.cpp
new file mode 100644
index 0000000000000..3ba752bd81f24
--- /dev/null
+++ b/libc/test/src/math/exhaustive/hypotf16_test.cpp
@@ -0,0 +1,58 @@
+//===-- Exhaustive test for hypotf16 --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "exhaustive_test.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/Hypot.h"
+#include "src/math/hypotf16.h"
+#include "test/UnitTest/FPMatcher.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+
+namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+
+// Range of both inputs: [0, inf]
+static constexpr uint16_t START = 0x0000U;
+static constexpr uint16_t STOP = 0x7C00U;
+
+struct Hypotf16Checker : public virtual LIBC_NAMESPACE::testing::Test {
+ using FloatType = float16;
+ using FPBits = LIBC_NAMESPACE::fputil::FPBits<float16>;
+ using StorageType = typename FPBits::StorageType;
+
+ uint64_t check(uint16_t start, uint16_t stop, mpfr::RoundingMode rounding) {
+ mpfr::ForceRoundingMode r(rounding);
+ if (!r.success)
+ return true;
+ uint16_t xbits = start;
+ uint64_t failed = 0;
+ do {
+ float16 x = FPBits(xbits).get_val();
+ uint16_t ybits = xbits;
+ do {
+ float16 y = FPBits(ybits).get_val();
+ bool correct = TEST_FP_EQ(LIBC_NAMESPACE::fputil::hypot(x, y),
+ LIBC_NAMESPACE::hypotf16(x, y));
+ // Using MPFR will be much slower.
+ // mpfr::BinaryInput<float16> input{x, y};
+ // bool correct = TEST_MPFR_MATCH_ROUNDING_SILENTLY(
+ // mpfr::Operation::Hypot, input, LIBC_NAMESPACE::hypotf16(x, y),
+ // 0.5,
+ // rounding);
+ failed += (!correct);
+ } while (ybits++ < STOP);
+ } while (xbits++ < stop);
+ return failed;
+ }
+};
+
+using LlvmLibcHypotf16ExhaustiveTest =
+ LlvmLibcExhaustiveMathTest<Hypotf16Checker>;
+
+TEST_F(LlvmLibcHypotf16ExhaustiveTest, PositiveRange) {
+ test_full_range_all_roundings(START, STOP);
+}
diff --git a/libc/test/src/math/hypotf16_hard_to_round.h b/libc/test/src/math/hypotf16_hard_to_round.h
new file mode 100644
index 0000000000000..15e0dc13adbf3
--- /dev/null
+++ b/libc/test/src/math/hypotf16_hard_to_round.h
@@ -0,0 +1,522 @@
+//===-- Hard-to-round inputs for hypotf16 ----------------------------C++--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_TEST_SRC_MATH_HYPOTTEST_HARD_TO_ROUND_H
+#define LLVM_LIBC_TEST_SRC_MATH_HYPOTTEST_HARD_TO_ROUND_H
+
+#include "utils/MPFRWrapper/MPFRUtils.h"
+
+namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+
+// 1000 cases where the precise result is between two consecutive
+// Float16 numbers
+constexpr int N_HARD_TO_ROUND = 1000;
+constexpr mpfr::BinaryInput<float16> HYPOTF16_HARD_TO_ROUND[N_HARD_TO_ROUND] = {
+ {0x1.3b8p-5, 0x1.d94p-6}, {0x1.068p-7, 0x1.1ccp-8},
+ {0x1.1dp-3, 0x1.4c8p-5}, {0x1.c38p-2, 0x1.784p-3},
+ {0x1.e3cp-3, 0x1.02p-3}, {0x1.04p-8, 0x1.b3p-11},
+ {0x1.e3p+5, 0x1.928p+4}, {0x1.8bp+4, 0x1.638p+2},
+ {0x1.4fp+1, 0x1.2d8p-1}, {0x1.458p+3, 0x1.0f4p+2},
+ {0x1.c08p+1, 0x1.75cp+0}, {0x1.1fp-13, 0x1.524p-14},
+ {0x1.36p-3, 0x1.2e4p-4}, {0x1.b58p-9, 0x1.3ecp-10},
+ {0x1.d74p+6, 0x1.a7p+6}, {0x1.dc4p-6, 0x1.fcp-7},
+ {0x1.6f8p-3, 0x1.324p-4}, {0x1.86p+13, 0x1.7c4p+12},
+ {0x1.9c8p+4, 0x1.57cp+3}, {0x1.81p+7, 0x1.638p+5},
+ {0x1.488p+9, 0x1.11cp+8}, {0x1.0a8p+12, 0x1.f68p+10},
+ {0x1.008p-8, 0x1.bcp-13}, {0x1.818p-8, 0x1.414p-9},
+ {0x1.efp+1, 0x1.814p+1}, {0x1.b6cp+6, 0x1.11p+6},
+ {0x1.458p+10, 0x1.93p+7}, {0x1.058p+13, 0x1.884p+12},
+ {0x1.ae8p+7, 0x1.0a8p+5}, {0x1.9c8p+7, 0x1.60cp+6},
+ {0x1.2cp+2, 0x1.14cp+1}, {0x1.9bp+13, 0x1.df8p+11},
+ {0x1.b2p+14, 0x1.d1p+11}, {0x1.f8p-10, 0x1.d1cp-10},
+ {0x1.22p-13, 0x1.7b8p-15}, {0x1.cep-13, 0x1.104p-13},
+ {0x1.2e8p+5, 0x1.cep+1}, {0x1.edp-10, 0x1.71cp-10},
+ {0x1.6f8p+12, 0x1.96p+8}, {0x1.e78p+7, 0x1.32p+3},
+ {0x1.2dp-1, 0x1.62cp-2}, {0x1.ab8p-4, 0x1.72p-8},
+ {0x1.ab8p+7, 0x1.398p+5}, {0x1.97p-10, 0x1.1b8p-12},
+ {0x1.eap+5, 0x1.068p+3}, {0x1.fdp+11, 0x1.7dcp+11},
+ {0x1.fbp-14, 0x1.b04p-14}, {0x1.3bp+14, 0x1.6f8p+12},
+ {0x1.4c8p+5, 0x1.c8cp+4}, {0x1.848p-3, 0x1.e1p-6},
+ {0x1.72p+4, 0x1.68cp+3}, {0x1.2a8p-10, 0x1.bfcp-11},
+ {0x1.d8cp+9, 0x1.59p+9}, {0x1.e3p+12, 0x1.a2cp+12},
+ {0x1.81p-8, 0x1.5a8p-10}, {0x1.b48p+3, 0x1.6bcp+2},
+ {0x1.14p+8, 0x1.bbcp+7}, {0x1.72p-1, 0x1.cep-5},
+ {0x1.068p-3, 0x1.aa4p-4}, {0x1.7b8p+5, 0x1.e8cp+4},
+ {0x1.14p-9, 0x1.998p-11}, {0x1.878p+1, 0x1.83cp+0},
+ {0x1.068p+5, 0x1.41cp+4}, {0x1.0bp+14, 0x1.378p+12},
+ {0x1.2dp-2, 0x1.0a4p-3}, {0x1.ed8p+10, 0x1.bdp+7},
+ {0x1.f8p+5, 0x1.158p+4}, {0x1.44p-2, 0x1.62cp-3},
+ {0x1.a4p+11, 0x1.ce8p+9}, {0x1.fbp+5, 0x1.45p+2},
+ {0x1.1dp+4, 0x1.4c8p+2}, {0x1.2a8p-1, 0x1.f18p-3},
+ {0x1.2dp-11, 0x1.0a4p-12}, {0x1.c2p-8, 0x1.b6cp-9},
+ {0x1.adp+12, 0x1.41cp+12}, {0x1.9fp+4, 0x1.758p+2},
+ {0x1.42p-7, 0x1.59p-10}, {0x1.198p-8, 0x1.a64p-9},
+ {0x1.f2cp-2, 0x1.56p-3}, {0x1.e14p-2, 0x1.b9p-2},
+ {0x1.158p+7, 0x1.d5p+4}, {0x1.1f8p+7, 0x1.af4p+6},
+ {0x1.d88p-6, 0x1.8fp-9}, {0x1.38p-8, 0x1.f1p-11},
+ {0x1.3bp-8, 0x1.1cp-13}, {0x1.4a8p-4, 0x1.efcp-5},
+ {0x1.efp-2, 0x1.848p-3}, {0x1.cep-1, 0x1.75cp-1},
+ {0x1.098p+12, 0x1.ba8p+10}, {0x1.1fp+13, 0x1.c08p+11},
+ {0x1.d9p-12, 0x1.5cp-17}, {0x1.89cp+8, 0x1.77p+8},
+ {0x1.158p+0, 0x1.97p-3}, {0x1.d7p+1, 0x1.614p+1},
+ {0x1.b3p+5, 0x1.878p+3}, {0x1.028p-4, 0x1.af4p-5},
+ {0x1.93p-6, 0x1.b3cp-7}, {0x1.4d8p+13, 0x1.f44p+12},
+ {0x1.d4cp+5, 0x1.f4p+4}, {0x1.efp-11, 0x1.59p-14},
+ {0x1.b6cp+11, 0x1.11p+11}, {0x1.328p+11, 0x1.cbcp+10},
+ {0x1.bap+0, 0x1.be4p-1}, {0x1.318p+11, 0x1.ca4p+10},
+ {0x1.74cp-13, 0x1.63p-13}, {0x1.3bp+11, 0x1.1b8p+9},
+ {0x1.278p-5, 0x1.bb4p-6}, {0x1.1f8p+7, 0x1.74cp+6},
+ {0x1.848p-12, 0x1.8bp-15}, {0x1.28p-5, 0x1.59p-8},
+ {0x1.9ep+11, 0x1.87p+8}, {0x1.d38p-10, 0x1.e78p-12},
+ {0x1.abp-8, 0x1.f74p-9}, {0x1.5fp+12, 0x1.c2p+8},
+ {0x1.dbp-13, 0x1.998p-14}, {0x1.0e8p-5, 0x1.95cp-6},
+ {0x1.98p+6, 0x1.8a4p+5}, {0x1.998p+0, 0x1.fbp-3},
+ {0x1.188p+2, 0x1.d38p+0}, {0x1.efp-5, 0x1.814p-5},
+ {0x1.dbp-10, 0x1.464p-10}, {0x1.1a8p-10, 0x1.a7cp-11},
+ {0x1.068p-12, 0x1.41cp-13}, {0x1.ba8p-4, 0x1.448p-6},
+ {0x1.658p+10, 0x1.89cp+9}, {0x1.9c8p+2, 0x1.2e8p+0},
+ {0x1.d88p-12, 0x1.89cp-13}, {0x1.508p+4, 0x1.f8cp+3},
+ {0x1.c2p-8, 0x1.be4p-8}, {0x1.d4cp+9, 0x1.f4p+8},
+ {0x1.198p+5, 0x1.a64p+4}, {0x1.9ap-5, 0x1.44p-10},
+ {0x1.d5p+5, 0x1.5fcp+5}, {0x1.dbp-3, 0x1.ab8p-5},
+ {0x1.efp-14, 0x1.3ecp-14}, {0x1.05p+0, 0x1.c7p-3},
+ {0x1.c7p+15, 0x1.998p+13}, {0x1.658p+7, 0x1.7b8p+5},
+ {0x1.c98p+11, 0x1.7d4p+10}, {0x1.c2p+2, 0x1.a9p-1},
+ {0x1.fdp-9, 0x1.7dcp-9}, {0x1.3bp-6, 0x1.6f8p-8},
+ {0x1.d18p+3, 0x1.de8p+1}, {0x1.2cp+1, 0x1.36p-3},
+ {0x1.458p-4, 0x1.e84p-5}, {0x1.3f8p+8, 0x1.df4p+7},
+ {0x1.c2p+1, 0x1.26p-3}, {0x1.e5p+9, 0x1.6bcp+9},
+ {0x1.e7p+4, 0x1.6d4p+4}, {0x1.318p-12, 0x1.7f4p-13},
+ {0x1.cbp-2, 0x1.0bcp-3}, {0x1.068p-3, 0x1.41cp-4},
+ {0x1.55p+10, 0x1.e78p+8}, {0x1.c5cp+14, 0x1.8cp+14},
+ {0x1.a28p-1, 0x1.ddcp-2}, {0x1.848p-7, 0x1.43cp-8},
+ {0x1.9ecp-6, 0x1.8bp-6}, {0x1.3bp-2, 0x1.364p-3},
+ {0x1.c2p+13, 0x1.26p+9}, {0x1.1b8p-13, 0x1.a94p-14},
+ {0x1.dbp+11, 0x1.ab8p+9}, {0x1.f3p-13, 0x1.764p-13},
+ {0x1.32p+7, 0x1.ae8p+5}, {0x1.2dp-4, 0x1.0a4p-5},
+ {0x1.d8cp+7, 0x1.59p+7}, {0x1.05p+1, 0x1.308p-1},
+ {0x1.d64p+7, 0x1.11p+7}, {0x1.6f8p+7, 0x1.324p+6},
+ {0x1.d88p+8, 0x1.248p+6}, {0x1.008p+5, 0x1.ab8p+3},
+ {0x1.248p+4, 0x1.f2cp+3}, {0x1.e5p-11, 0x1.b48p-13},
+ {0x1.c2p+7, 0x1.964p+7}, {0x1.fa4p+0, 0x1.3bp+0},
+ {0x1.058p-12, 0x1.884p-13}, {0x1.f2p-13, 0x1.228p-14},
+ {0x1.c5cp-14, 0x1.8cp-14}, {0x1.cep+4, 0x1.75cp+4},
+ {0x1.71p+15, 0x1.81p+12}, {0x1.3bp+12, 0x1.644p+11},
+ {0x1.188p-8, 0x1.0cp-13}, {0x1.2fp+9, 0x1.618p+7},
+ {0x1.088p+11, 0x1.8ccp+10}, {0x1.c3p+9, 0x1.524p+9},
+ {0x1.f2cp+8, 0x1.44p+8}, {0x1.11p-7, 0x1.34cp-8},
+ {0x1.efp+9, 0x1.848p+8}, {0x1.fa4p+11, 0x1.0ep+11},
+ {0x1.3e8p-7, 0x1.ddcp-8}, {0x1.288p-13, 0x1.bccp-14},
+ {0x1.518p-8, 0x1.fa4p-9}, {0x1.c7p-3, 0x1.944p-4},
+ {0x1.338p+3, 0x1.c3p+0}, {0x1.1b8p+14, 0x1.d88p+12},
+ {0x1.d7p-5, 0x1.614p-5}, {0x1.9ep+6, 0x1.87p+3},
+ {0x1.e9p+13, 0x1.6ecp+13}, {0x1.9dp-3, 0x1.35cp-3},
+ {0x1.318p+13, 0x1.ca4p+12}, {0x1.11p+1, 0x1.cd4p+0},
+ {0x1.26p-10, 0x1.3bp-13}, {0x1.adp+14, 0x1.13p+11},
+ {0x1.9bcp-7, 0x1.76p-7}, {0x1.e6p-10, 0x1.cbp-13},
+ {0x1.dbp-2, 0x1.86p-6}, {0x1.f2cp+3, 0x1.dbp+3},
+ {0x1.2a8p+3, 0x1.bfcp+2}, {0x1.f2cp+14, 0x1.0ap+14},
+ {0x1.5cp+13, 0x1.b6cp+12}, {0x1.3d8p+3, 0x1.dc4p+2},
+ {0x1.0b8p-10, 0x1.914p-11}, {0x1.5cp+3, 0x1.6dcp+2},
+ {0x1.ba8p+0, 0x1.70cp-1}, {0x1.45p-2, 0x1.20cp-3},
+ {0x1.ab8p+5, 0x1.72p+1}, {0x1.dbp-13, 0x1.86p-17},
+ {0x1.aep-9, 0x1.6a4p-9}, {0x1.298p-13, 0x1.14p-18},
+ {0x1.0f8p-10, 0x1.974p-11}, {0x1.ddp+1, 0x1.65cp+1},
+ {0x1.d88p-6, 0x1.5a8p-8}, {0x1.08p-4, 0x1.04p-9},
+ {0x1.318p-1, 0x1.ca4p-2}, {0x1.45p-8, 0x1.248p-10},
+ {0x1.068p+14, 0x1.1ccp+13}, {0x1.468p+13, 0x1.e9cp+12},
+ {0x1.aa4p-9, 0x1.74p-9}, {0x1.02p+3, 0x1.3e4p+2},
+ {0x1.6bp-11, 0x1.a78p-13}, {0x1.158p-5, 0x1.d5p-8},
+ {0x1.0f8p-4, 0x1.974p-5}, {0x1.1ep-1, 0x1.728p-3},
+ {0x1.a9p+8, 0x1.7e8p+6}, {0x1.86p+13, 0x1.3cp+8},
+ {0x1.c2p+6, 0x1.8ecp+6}, {0x1.238p-4, 0x1.b54p-5},
+ {0x1.3bp-13, 0x1.734p-14}, {0x1.aep-3, 0x1.f2p-7},
+ {0x1.2cp+7, 0x1.36p+3}, {0x1.70cp+13, 0x1.65p+13},
+ {0x1.ed8p-9, 0x1.bdp-12}, {0x1.188p+15, 0x1.248p+13},
...
[truncated]
|
Added increment which parallelizes the exhaustive test and reduces time taken from 46 minutes to 7 minutes. Added other suggested changes too. |
I've got the following warnings when building the tests. Can you add explicit casts for them?
|
Added negative exhaustive tests, removed |
Changed to using |
Implement hypot for Float16 along with tests.
Implement hypot for Float16 along with tests.