diff --git a/libc/shared/math.h b/libc/shared/math.h index 617e46602de8c..baec05b08a25d 100644 --- a/libc/shared/math.h +++ b/libc/shared/math.h @@ -13,6 +13,7 @@ #include "math/acos.h" #include "math/acosf.h" +#include "math/acosf16.h" #include "math/exp.h" #include "math/exp10.h" #include "math/exp10f.h" diff --git a/libc/shared/math/acosf16.h b/libc/shared/math/acosf16.h new file mode 100644 index 0000000000000..aaf6ed9922556 --- /dev/null +++ b/libc/shared/math/acosf16.h @@ -0,0 +1,29 @@ +//===-- Shared acosf16 function ---------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SHARED_MATH_ACOSF16_H +#define LLVM_LIBC_SHARED_MATH_ACOSF16_H + +#include "include/llvm-libc-macros/float16-macros.h" + +#ifdef LIBC_TYPES_HAS_FLOAT16 + +#include "shared/libc_common.h" +#include "src/__support/math/acosf16.h" + +namespace LIBC_NAMESPACE_DECL { +namespace shared { + +using math::acosf16; + +} // namespace shared +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT16 + +#endif // LLVM_LIBC_SHARED_MATH_ACOSF16_H diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt index fbe7e2c3125ce..fc2c8a65340cc 100644 --- a/libc/src/__support/math/CMakeLists.txt +++ b/libc/src/__support/math/CMakeLists.txt @@ -31,6 +31,22 @@ add_header_library( libc.src.__support.macros.optimization ) +add_header_library( + acosf16 + HDRS + acosf16.h + DEPENDS + libc.src.__support.FPUtil.cast + libc.src.__support.FPUtil.except_value_utils + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.polyeval + libc.src.__support.FPUtil.sqrt + libc.src.__support.macros.optimization + libc.src.__support.macros.properties.types +) + add_header_library( asin_utils HDRS diff --git a/libc/src/__support/math/acos.h b/libc/src/__support/math/acos.h index 7c9fc7671b248..a52ead7fc1b3b 100644 --- a/libc/src/__support/math/acos.h +++ b/libc/src/__support/math/acos.h @@ -26,7 +26,6 @@ namespace math { static constexpr double acos(double x) { using DoubleDouble = fputil::DoubleDouble; - using Float128 = fputil::DyadicFloat<128>; using namespace asin_internal; using FPBits = fputil::FPBits; diff --git a/libc/src/__support/math/acosf16.h b/libc/src/__support/math/acosf16.h new file mode 100644 index 0000000000000..58d3761b95245 --- /dev/null +++ b/libc/src/__support/math/acosf16.h @@ -0,0 +1,164 @@ +//===-- Implementation header for acosf16 -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ACOSF16_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_ACOSF16_H + +#include "include/llvm-libc-macros/float16-macros.h" + +#ifdef LIBC_TYPES_HAS_FLOAT16 + +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/cast.h" +#include "src/__support/FPUtil/except_value_utils.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/sqrt.h" +#include "src/__support/macros/optimization.h" + +namespace LIBC_NAMESPACE_DECL { + +namespace math { + +static constexpr float16 acosf16(float16 x) { + + // Generated by Sollya using the following command: + // > round(pi/2, SG, RN); + // > round(pi, SG, RN); + constexpr float PI_OVER_2 = 0x1.921fb6p0f; + constexpr float PI = 0x1.921fb6p1f; + +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + constexpr size_t N_EXCEPTS = 2; + + constexpr fputil::ExceptValues ACOSF16_EXCEPTS{{ + // (input, RZ output, RU offset, RD offset, RN offset) + {0xacaf, 0x3e93, 1, 0, 0}, + {0xb874, 0x4052, 1, 0, 1}, + }}; +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS + + using FPBits = fputil::FPBits; + FPBits xbits(x); + + uint16_t x_u = xbits.uintval(); + uint16_t x_abs = x_u & 0x7fff; + uint16_t x_sign = x_u >> 15; + + // |x| > 0x1p0, |x| > 1, or x is NaN. + if (LIBC_UNLIKELY(x_abs > 0x3c00)) { + // acosf16(NaN) = NaN + if (xbits.is_nan()) { + if (xbits.is_signaling_nan()) { + fputil::raise_except_if_required(FE_INVALID); + return FPBits::quiet_nan().get_val(); + } + + return x; + } + + // 1 < |x| <= +/-inf + fputil::raise_except_if_required(FE_INVALID); + fputil::set_errno_if_required(EDOM); + + return FPBits::quiet_nan().get_val(); + } + + float xf = x; + +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + // Handle exceptional values + if (auto r = ACOSF16_EXCEPTS.lookup(x_u); LIBC_UNLIKELY(r.has_value())) + return r.value(); +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS + + // |x| == 0x1p0, x is 1 or -1 + // if x is (-)1, return pi, else + // if x is (+)1, return 0 + if (LIBC_UNLIKELY(x_abs == 0x3c00)) + return fputil::cast(x_sign ? PI : 0.0f); + + float xsq = xf * xf; + + // |x| <= 0x1p-1, |x| <= 0.5 + if (x_abs <= 0x3800) { + // if x is 0, return pi/2 + if (LIBC_UNLIKELY(x_abs == 0)) + return fputil::cast(PI_OVER_2); + + // Note that: acos(x) = pi/2 + asin(-x) = pi/2 - asin(x) + // Degree-6 minimax polynomial of asin(x) generated by Sollya with: + // > P = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8|], [|SG...|], [0, 0.5]); + float interm = + fputil::polyeval(xsq, 0x1.000002p0f, 0x1.554c2ap-3f, 0x1.3541ccp-4f, + 0x1.43b2d6p-5f, 0x1.a0d73ep-5f); + return fputil::cast(fputil::multiply_add(-xf, interm, PI_OVER_2)); + } + + // When |x| > 0.5, assume that 0.5 < |x| <= 1 + // + // Step-by-step range-reduction proof: + // 1: Let y = asin(x), such that, x = sin(y) + // 2: From complimentary angle identity: + // x = sin(y) = cos(pi/2 - y) + // 3: Let z = pi/2 - y, such that x = cos(z) + // 4: From double angle formula; cos(2A) = 1 - 2 * sin^2(A): + // z = 2A, z/2 = A + // cos(z) = 1 - 2 * sin^2(z/2) + // 5: Make sin(z/2) subject of the formula: + // sin(z/2) = sqrt((1 - cos(z))/2) + // 6: Recall [3]; x = cos(z). Therefore: + // sin(z/2) = sqrt((1 - x)/2) + // 7: Let u = (1 - x)/2 + // 8: Therefore: + // asin(sqrt(u)) = z/2 + // 2 * asin(sqrt(u)) = z + // 9: Recall [3]; z = pi/2 - y. Therefore: + // y = pi/2 - z + // y = pi/2 - 2 * asin(sqrt(u)) + // 10: Recall [1], y = asin(x). Therefore: + // asin(x) = pi/2 - 2 * asin(sqrt(u)) + // 11: Recall that: acos(x) = pi/2 + asin(-x) = pi/2 - asin(x) + // Therefore: + // acos(x) = pi/2 - (pi/2 - 2 * asin(sqrt(u))) + // acos(x) = 2 * asin(sqrt(u)) + // + // THE RANGE REDUCTION, HOW? + // 12: Recall [7], u = (1 - x)/2 + // 13: Since 0.5 < x <= 1, therefore: + // 0 <= u <= 0.25 and 0 <= sqrt(u) <= 0.5 + // + // Hence, we can reuse the same [0, 0.5] domain polynomial approximation for + // Step [11] as `sqrt(u)` is in range. + // When -1 < x <= -0.5, the identity: + // acos(x) = pi - acos(-x) + // allows us to compute for the negative x value (lhs) + // with a positive x value instead (rhs). + + float xf_abs = (xf < 0 ? -xf : xf); + float u = fputil::multiply_add(-0.5f, xf_abs, 0.5f); + float sqrt_u = fputil::sqrt(u); + + // Degree-6 minimax polynomial of asin(x) generated by Sollya with: + // > P = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8|], [|SG...|], [0, 0.5]); + float asin_sqrt_u = + sqrt_u * fputil::polyeval(u, 0x1.000002p0f, 0x1.554c2ap-3f, + 0x1.3541ccp-4f, 0x1.43b2d6p-5f, 0x1.a0d73ep-5f); + + return fputil::cast( + x_sign ? fputil::multiply_add(-2.0f, asin_sqrt_u, PI) : 2 * asin_sqrt_u); +} + +} // namespace math + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT16 + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ACOS_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index c90665d5aa2a5..9ea024f48fa7e 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -4040,17 +4040,8 @@ add_entrypoint_object( HDRS ../acosf16.h DEPENDS - libc.hdr.errno_macros - libc.hdr.fenv_macros - libc.src.__support.FPUtil.cast - libc.src.__support.FPUtil.except_value_utils - libc.src.__support.FPUtil.fenv_impl - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.polyeval - libc.src.__support.FPUtil.sqrt - libc.src.__support.macros.optimization - libc.src.__support.macros.properties.types + libc.src.__support.math.acosf16 + libc.src.errno.errno ) add_entrypoint_object( diff --git a/libc/src/math/generic/acosf16.cpp b/libc/src/math/generic/acosf16.cpp index 202a950fbb5dd..0bf85f84c842c 100644 --- a/libc/src/math/generic/acosf16.cpp +++ b/libc/src/math/generic/acosf16.cpp @@ -8,144 +8,10 @@ //===----------------------------------------------------------------------===// #include "src/math/acosf16.h" -#include "hdr/errno_macros.h" -#include "hdr/fenv_macros.h" -#include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/PolyEval.h" -#include "src/__support/FPUtil/cast.h" -#include "src/__support/FPUtil/except_value_utils.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/sqrt.h" -#include "src/__support/macros/optimization.h" +#include "src/__support/math/acosf16.h" namespace LIBC_NAMESPACE_DECL { -// Generated by Sollya using the following command: -// > round(pi/2, SG, RN); -// > round(pi, SG, RN); -static constexpr float PI_OVER_2 = 0x1.921fb6p0f; -static constexpr float PI = 0x1.921fb6p1f; +LLVM_LIBC_FUNCTION(float16, acosf16, (float16 x)) { return math::acosf16(x); } -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS -static constexpr size_t N_EXCEPTS = 2; - -static constexpr fputil::ExceptValues ACOSF16_EXCEPTS{{ - // (input, RZ output, RU offset, RD offset, RN offset) - {0xacaf, 0x3e93, 1, 0, 0}, - {0xb874, 0x4052, 1, 0, 1}, -}}; -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - -LLVM_LIBC_FUNCTION(float16, acosf16, (float16 x)) { - using FPBits = fputil::FPBits; - FPBits xbits(x); - - uint16_t x_u = xbits.uintval(); - uint16_t x_abs = x_u & 0x7fff; - uint16_t x_sign = x_u >> 15; - - // |x| > 0x1p0, |x| > 1, or x is NaN. - if (LIBC_UNLIKELY(x_abs > 0x3c00)) { - // acosf16(NaN) = NaN - if (xbits.is_nan()) { - if (xbits.is_signaling_nan()) { - fputil::raise_except_if_required(FE_INVALID); - return FPBits::quiet_nan().get_val(); - } - - return x; - } - - // 1 < |x| <= +/-inf - fputil::raise_except_if_required(FE_INVALID); - fputil::set_errno_if_required(EDOM); - - return FPBits::quiet_nan().get_val(); - } - - float xf = x; - -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - // Handle exceptional values - if (auto r = ACOSF16_EXCEPTS.lookup(x_u); LIBC_UNLIKELY(r.has_value())) - return r.value(); -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - - // |x| == 0x1p0, x is 1 or -1 - // if x is (-)1, return pi, else - // if x is (+)1, return 0 - if (LIBC_UNLIKELY(x_abs == 0x3c00)) - return fputil::cast(x_sign ? PI : 0.0f); - - float xsq = xf * xf; - - // |x| <= 0x1p-1, |x| <= 0.5 - if (x_abs <= 0x3800) { - // if x is 0, return pi/2 - if (LIBC_UNLIKELY(x_abs == 0)) - return fputil::cast(PI_OVER_2); - - // Note that: acos(x) = pi/2 + asin(-x) = pi/2 - asin(x) - // Degree-6 minimax polynomial of asin(x) generated by Sollya with: - // > P = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8|], [|SG...|], [0, 0.5]); - float interm = - fputil::polyeval(xsq, 0x1.000002p0f, 0x1.554c2ap-3f, 0x1.3541ccp-4f, - 0x1.43b2d6p-5f, 0x1.a0d73ep-5f); - return fputil::cast(fputil::multiply_add(-xf, interm, PI_OVER_2)); - } - - // When |x| > 0.5, assume that 0.5 < |x| <= 1 - // - // Step-by-step range-reduction proof: - // 1: Let y = asin(x), such that, x = sin(y) - // 2: From complimentary angle identity: - // x = sin(y) = cos(pi/2 - y) - // 3: Let z = pi/2 - y, such that x = cos(z) - // 4: From double angle formula; cos(2A) = 1 - 2 * sin^2(A): - // z = 2A, z/2 = A - // cos(z) = 1 - 2 * sin^2(z/2) - // 5: Make sin(z/2) subject of the formula: - // sin(z/2) = sqrt((1 - cos(z))/2) - // 6: Recall [3]; x = cos(z). Therefore: - // sin(z/2) = sqrt((1 - x)/2) - // 7: Let u = (1 - x)/2 - // 8: Therefore: - // asin(sqrt(u)) = z/2 - // 2 * asin(sqrt(u)) = z - // 9: Recall [3]; z = pi/2 - y. Therefore: - // y = pi/2 - z - // y = pi/2 - 2 * asin(sqrt(u)) - // 10: Recall [1], y = asin(x). Therefore: - // asin(x) = pi/2 - 2 * asin(sqrt(u)) - // 11: Recall that: acos(x) = pi/2 + asin(-x) = pi/2 - asin(x) - // Therefore: - // acos(x) = pi/2 - (pi/2 - 2 * asin(sqrt(u))) - // acos(x) = 2 * asin(sqrt(u)) - // - // THE RANGE REDUCTION, HOW? - // 12: Recall [7], u = (1 - x)/2 - // 13: Since 0.5 < x <= 1, therefore: - // 0 <= u <= 0.25 and 0 <= sqrt(u) <= 0.5 - // - // Hence, we can reuse the same [0, 0.5] domain polynomial approximation for - // Step [11] as `sqrt(u)` is in range. - // When -1 < x <= -0.5, the identity: - // acos(x) = pi - acos(-x) - // allows us to compute for the negative x value (lhs) - // with a positive x value instead (rhs). - - float xf_abs = (xf < 0 ? -xf : xf); - float u = fputil::multiply_add(-0.5f, xf_abs, 0.5f); - float sqrt_u = fputil::sqrt(u); - - // Degree-6 minimax polynomial of asin(x) generated by Sollya with: - // > P = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8|], [|SG...|], [0, 0.5]); - float asin_sqrt_u = - sqrt_u * fputil::polyeval(u, 0x1.000002p0f, 0x1.554c2ap-3f, - 0x1.3541ccp-4f, 0x1.43b2d6p-5f, 0x1.a0d73ep-5f); - - return fputil::cast( - x_sign ? fputil::multiply_add(-2.0f, asin_sqrt_u, PI) : 2 * asin_sqrt_u); -} } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/test/shared/CMakeLists.txt b/libc/test/shared/CMakeLists.txt index 07a4e735362b2..5fa18321bdcdf 100644 --- a/libc/test/shared/CMakeLists.txt +++ b/libc/test/shared/CMakeLists.txt @@ -21,4 +21,5 @@ add_fp_unittest( libc.src.__support.math.ldexpf libc.src.__support.math.ldexpf128 libc.src.__support.math.ldexpf16 + libc.src.__support.math.acosf16 ) diff --git a/libc/test/shared/shared_math_test.cpp b/libc/test/shared/shared_math_test.cpp index 40fea3bb9b952..0152557c08c96 100644 --- a/libc/test/shared/shared_math_test.cpp +++ b/libc/test/shared/shared_math_test.cpp @@ -25,6 +25,8 @@ TEST(LlvmLibcSharedMathTest, AllFloat16) { EXPECT_FP_EQ_ALL_ROUNDING(0.75f16, LIBC_NAMESPACE::shared::frexpf16(24.0f, &exponent)); EXPECT_EQ(exponent, 5); + + EXPECT_FP_EQ(0x1.921fb6p+0f16, LIBC_NAMESPACE::shared::acosf16(0.0f16)); } #endif diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 337423cfb96cb..4579000fd174f 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -2093,6 +2093,20 @@ libc_support_library( ], ) +libc_support_library( + name = "__support_math_acosf16", + hdrs = ["src/__support/math/acosf16.h"], + deps = [ + ":__support_fputil_cast", + ":__support_fputil_fma", + ":__support_fputil_multiply_add", + ":__support_fputil_nearest_integer", + ":__support_fputil_polyeval", + ":__support_fputil_sqrt", + ":__support_macros_optimization", + ], +) + libc_support_library( name = "__support_math_asin_utils", hdrs = ["src/__support/math/asin_utils.h"], @@ -2611,14 +2625,8 @@ libc_math_function( libc_math_function( name = "acosf16", additional_deps = [ - ":__support_fputil_cast", - ":__support_fputil_fma", - ":__support_fputil_multiply_add", - ":__support_fputil_nearest_integer", - ":__support_fputil_polyeval", - ":__support_fputil_sqrt", - ":__support_macros_optimization", - ":__support_math_inv_trigf_utils", + ":__support_math_acosf16", + ":errno", ], )