diff --git a/libc/shared/math.h b/libc/shared/math.h index 26f69d6fa43ea..8dcfaf0352339 100644 --- a/libc/shared/math.h +++ b/libc/shared/math.h @@ -11,6 +11,7 @@ #include "libc_common.h" +#include "math/acos.h" #include "math/exp.h" #include "math/exp10.h" #include "math/exp10f.h" diff --git a/libc/shared/math/acos.h b/libc/shared/math/acos.h new file mode 100644 index 0000000000000..73c6b512e16f4 --- /dev/null +++ b/libc/shared/math/acos.h @@ -0,0 +1,23 @@ +//===-- Shared acos function ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SHARED_MATH_ACOS_H +#define LLVM_LIBC_SHARED_MATH_ACOS_H + +#include "shared/libc_common.h" +#include "src/__support/math/acos.h" + +namespace LIBC_NAMESPACE_DECL { +namespace shared { + +using math::acos; + +} // namespace shared +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SHARED_MATH_ACOS_H diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt index 77a47c65489dd..4a29c2975d523 100644 --- a/libc/src/__support/math/CMakeLists.txt +++ b/libc/src/__support/math/CMakeLists.txt @@ -1,3 +1,36 @@ +add_header_library( + acos + HDRS + acos.h + DEPENDS + .asin_utils + libc.src.__support.math.asin_utils + libc.src.__support.FPUtil.double_double + libc.src.__support.FPUtil.dyadic_float + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.polyeval + libc.src.__support.FPUtil.sqrt + libc.src.__support.macros.optimization + libc.src.__support.macros.properties.types + libc.src.__support.macros.properties.cpu_features +) + +add_header_library( + asin_utils + HDRS + asin_utils.h + DEPENDS + libc.src.__support.integer_literals + libc.src.__support.FPUtil.double_double + libc.src.__support.FPUtil.dyadic_float + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.nearest_integer + libc.src.__support.FPUtil.polyeval + libc.src.__support.macros.optimization +) + add_header_library( exp_float_constants HDRS diff --git a/libc/src/__support/math/acos.h b/libc/src/__support/math/acos.h new file mode 100644 index 0000000000000..a7287f11aa302 --- /dev/null +++ b/libc/src/__support/math/acos.h @@ -0,0 +1,285 @@ +//===-- Implementation header for acos --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ACOS_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_ACOS_H + +#include "asin_utils.h" +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/double_double.h" +#include "src/__support/FPUtil/dyadic_float.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/sqrt.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY +#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA + +namespace LIBC_NAMESPACE_DECL { + +namespace math { + +using DoubleDouble = fputil::DoubleDouble; +using Float128 = fputil::DyadicFloat<128>; + +static constexpr double acos(double x) { + using FPBits = fputil::FPBits; + + FPBits xbits(x); + int x_exp = xbits.get_biased_exponent(); + + // |x| < 0.5. + if (x_exp < FPBits::EXP_BIAS - 1) { + // |x| < 2^-55. + if (LIBC_UNLIKELY(x_exp < FPBits::EXP_BIAS - 55)) { + // When |x| < 2^-55, acos(x) = pi/2 +#if defined(LIBC_MATH_HAS_SKIP_ACCURATE_PASS) + return PI_OVER_TWO.hi; +#else + // Force the evaluation and prevent constant propagation so that it + // is rounded correctly for FE_UPWARD rounding mode. + return (xbits.abs().get_val() + 0x1.0p-160) + PI_OVER_TWO.hi; +#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS + } + +#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + // acos(x) = pi/2 - asin(x) + // = pi/2 - x * P(x^2) + double p = asin_eval(x * x); + return PI_OVER_TWO.hi + fputil::multiply_add(-x, p, PI_OVER_TWO.lo); +#else + unsigned idx = 0; + DoubleDouble x_sq = fputil::exact_mult(x, x); + double err = xbits.abs().get_val() * 0x1.0p-51; + // Polynomial approximation: + // p ~ asin(x)/x + DoubleDouble p = asin_eval(x_sq, idx, err); + // asin(x) ~ x * p + DoubleDouble r0 = fputil::exact_mult(x, p.hi); + // acos(x) = pi/2 - asin(x) + // ~ pi/2 - x * p + // = pi/2 - x * (p.hi + p.lo) + double r_hi = fputil::multiply_add(-x, p.hi, PI_OVER_TWO.hi); + // Use Dekker's 2SUM algorithm to compute the lower part. + double r_lo = ((PI_OVER_TWO.hi - r_hi) - r0.hi) - r0.lo; + r_lo = fputil::multiply_add(-x, p.lo, r_lo + PI_OVER_TWO.lo); + + // Ziv's accuracy test. + + double r_upper = r_hi + (r_lo + err); + double r_lower = r_hi + (r_lo - err); + + if (LIBC_LIKELY(r_upper == r_lower)) + return r_upper; + + // Ziv's accuracy test failed, perform 128-bit calculation. + + // Recalculate mod 1/64. + idx = static_cast(fputil::nearest_integer(x_sq.hi * 0x1.0p6)); + + // Get x^2 - idx/64 exactly. When FMA is available, double-double + // multiplication will be correct for all rounding modes. Otherwise we use + // Float128 directly. + Float128 x_f128(x); + +#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE + // u = x^2 - idx/64 + Float128 u_hi( + fputil::multiply_add(static_cast(idx), -0x1.0p-6, x_sq.hi)); + Float128 u = fputil::quick_add(u_hi, Float128(x_sq.lo)); +#else + Float128 x_sq_f128 = fputil::quick_mul(x_f128, x_f128); + Float128 u = fputil::quick_add( + x_sq_f128, Float128(static_cast(idx) * (-0x1.0p-6))); +#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE + + Float128 p_f128 = asin_eval(u, idx); + // Flip the sign of x_f128 to perform subtraction. + x_f128.sign = x_f128.sign.negate(); + Float128 r = + fputil::quick_add(PI_OVER_TWO_F128, fputil::quick_mul(x_f128, p_f128)); + + return static_cast(r); +#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS + } + // |x| >= 0.5 + + double x_abs = xbits.abs().get_val(); + + // Maintaining the sign: + constexpr double SIGN[2] = {1.0, -1.0}; + double x_sign = SIGN[xbits.is_neg()]; + // |x| >= 1 + if (LIBC_UNLIKELY(x_exp >= FPBits::EXP_BIAS)) { + // x = +-1, asin(x) = +- pi/2 + if (x_abs == 1.0) { + // x = 1, acos(x) = 0, + // x = -1, acos(x) = pi + return x == 1.0 ? 0.0 : fputil::multiply_add(-x_sign, PI.hi, PI.lo); + } + // |x| > 1, return NaN. + if (xbits.is_quiet_nan()) + return x; + + // Set domain error for non-NaN input. + if (!xbits.is_nan()) + fputil::set_errno_if_required(EDOM); + + fputil::raise_except_if_required(FE_INVALID); + return FPBits::quiet_nan().get_val(); + } + + // When |x| >= 0.5, we perform range reduction as follow: + // + // When 0.5 <= x < 1, let: + // y = acos(x) + // We will use the double angle formula: + // cos(2y) = 1 - 2 sin^2(y) + // and the complement angle identity: + // x = cos(y) = 1 - 2 sin^2 (y/2) + // So: + // sin(y/2) = sqrt( (1 - x)/2 ) + // And hence: + // y/2 = asin( sqrt( (1 - x)/2 ) ) + // Equivalently: + // acos(x) = y = 2 * asin( sqrt( (1 - x)/2 ) ) + // Let u = (1 - x)/2, then: + // acos(x) = 2 * asin( sqrt(u) ) + // Moreover, since 0.5 <= x < 1: + // 0 < u <= 1/4, and 0 < sqrt(u) <= 0.5, + // And hence we can reuse the same polynomial approximation of asin(x) when + // |x| <= 0.5: + // acos(x) ~ 2 * sqrt(u) * P(u). + // + // When -1 < x <= -0.5, we reduce to the previous case using the formula: + // acos(x) = pi - acos(-x) + // = pi - 2 * asin ( sqrt( (1 + x)/2 ) ) + // ~ pi - 2 * sqrt(u) * P(u), + // where u = (1 - |x|)/2. + + // u = (1 - |x|)/2 + double u = fputil::multiply_add(x_abs, -0.5, 0.5); + // v_hi + v_lo ~ sqrt(u). + // Let: + // h = u - v_hi^2 = (sqrt(u) - v_hi) * (sqrt(u) + v_hi) + // Then: + // sqrt(u) = v_hi + h / (sqrt(u) + v_hi) + // ~ v_hi + h / (2 * v_hi) + // So we can use: + // v_lo = h / (2 * v_hi). + double v_hi = fputil::sqrt(u); + +#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + constexpr DoubleDouble CONST_TERM[2] = {{0.0, 0.0}, PI}; + DoubleDouble const_term = CONST_TERM[xbits.is_neg()]; + + double p = asin_eval(u); + double scale = x_sign * 2.0 * v_hi; + double r = const_term.hi + fputil::multiply_add(scale, p, const_term.lo); + return r; +#else + +#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE + double h = fputil::multiply_add(v_hi, -v_hi, u); +#else + DoubleDouble v_hi_sq = fputil::exact_mult(v_hi, v_hi); + double h = (u - v_hi_sq.hi) - v_hi_sq.lo; +#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE + + // Scale v_lo and v_hi by 2 from the formula: + // vh = v_hi * 2 + // vl = 2*v_lo = h / v_hi. + double vh = v_hi * 2.0; + double vl = h / v_hi; + + // Polynomial approximation: + // p ~ asin(sqrt(u))/sqrt(u) + unsigned idx = 0; + double err = vh * 0x1.0p-51; + + DoubleDouble p = asin_eval(DoubleDouble{0.0, u}, idx, err); + + // Perform computations in double-double arithmetic: + // asin(x) = pi/2 - (v_hi + v_lo) * (ASIN_COEFFS[idx][0] + p) + DoubleDouble r0 = fputil::quick_mult(DoubleDouble{vl, vh}, p); + + double r_hi = 0, r_lo = 0; + if (xbits.is_pos()) { + r_hi = r0.hi; + r_lo = r0.lo; + } else { + DoubleDouble r = fputil::exact_add(PI.hi, -r0.hi); + r_hi = r.hi; + r_lo = (PI.lo - r0.lo) + r.lo; + } + + // Ziv's accuracy test. + + double r_upper = r_hi + (r_lo + err); + double r_lower = r_hi + (r_lo - err); + + if (LIBC_LIKELY(r_upper == r_lower)) + return r_upper; + + // Ziv's accuracy test failed, we redo the computations in Float128. + // Recalculate mod 1/64. + idx = static_cast(fputil::nearest_integer(u * 0x1.0p6)); + + // After the first step of Newton-Raphson approximating v = sqrt(u), we have + // that: + // sqrt(u) = v_hi + h / (sqrt(u) + v_hi) + // v_lo = h / (2 * v_hi) + // With error: + // sqrt(u) - (v_hi + v_lo) = h * ( 1/(sqrt(u) + v_hi) - 1/(2*v_hi) ) + // = -h^2 / (2*v * (sqrt(u) + v)^2). + // Since: + // (sqrt(u) + v_hi)^2 ~ (2sqrt(u))^2 = 4u, + // we can add another correction term to (v_hi + v_lo) that is: + // v_ll = -h^2 / (2*v_hi * 4u) + // = -v_lo * (h / 4u) + // = -vl * (h / 8u), + // making the errors: + // sqrt(u) - (v_hi + v_lo + v_ll) = O(h^3) + // well beyond 128-bit precision needed. + + // Get the rounding error of vl = 2 * v_lo ~ h / vh + // Get full product of vh * vl +#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE + double vl_lo = fputil::multiply_add(-v_hi, vl, h) / v_hi; +#else + DoubleDouble vh_vl = fputil::exact_mult(v_hi, vl); + double vl_lo = ((h - vh_vl.hi) - vh_vl.lo) / v_hi; +#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE + // vll = 2*v_ll = -vl * (h / (4u)). + double t = h * (-0.25) / u; + double vll = fputil::multiply_add(vl, t, vl_lo); + // m_v = -(v_hi + v_lo + v_ll). + Float128 m_v = fputil::quick_add( + Float128(vh), fputil::quick_add(Float128(vl), Float128(vll))); + m_v.sign = xbits.sign(); + + // Perform computations in Float128: + // acos(x) = (v_hi + v_lo + vll) * P(u) , when 0.5 <= x < 1, + // = pi - (v_hi + v_lo + vll) * P(u) , when -1 < x <= -0.5. + Float128 y_f128(fputil::multiply_add(static_cast(idx), -0x1.0p-6, u)); + + Float128 p_f128 = asin_eval(y_f128, idx); + Float128 r_f128 = fputil::quick_mul(m_v, p_f128); + + if (xbits.is_neg()) + r_f128 = fputil::quick_add(PI_F128, r_f128); + + return static_cast(r_f128); +#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS +} + +} // namespace math + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ACOS_H diff --git a/libc/src/math/generic/asin_utils.h b/libc/src/__support/math/asin_utils.h similarity index 96% rename from libc/src/math/generic/asin_utils.h rename to libc/src/__support/math/asin_utils.h index 44913d573de2c..3146444afc51f 100644 --- a/libc/src/math/generic/asin_utils.h +++ b/libc/src/__support/math/asin_utils.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIBC_SRC_MATH_GENERIC_ASIN_UTILS_H -#define LLVM_LIBC_SRC_MATH_GENERIC_ASIN_UTILS_H +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ASIN_UTILS_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_ASIN_UTILS_H #include "src/__support/FPUtil/PolyEval.h" #include "src/__support/FPUtil/double_double.h" @@ -16,7 +16,6 @@ #include "src/__support/FPUtil/nearest_integer.h" #include "src/__support/integer_literals.h" #include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" namespace LIBC_NAMESPACE_DECL { @@ -25,10 +24,10 @@ namespace { using DoubleDouble = fputil::DoubleDouble; using Float128 = fputil::DyadicFloat<128>; -constexpr DoubleDouble PI = {0x1.1a62633145c07p-53, 0x1.921fb54442d18p1}; +static constexpr DoubleDouble PI = {0x1.1a62633145c07p-53, 0x1.921fb54442d18p1}; -constexpr DoubleDouble PI_OVER_TWO = {0x1.1a62633145c07p-54, - 0x1.921fb54442d18p0}; +static constexpr DoubleDouble PI_OVER_TWO = {0x1.1a62633145c07p-54, + 0x1.921fb54442d18p0}; #ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS @@ -39,14 +38,14 @@ constexpr DoubleDouble PI_OVER_TWO = {0x1.1a62633145c07p-54, // > dirtyinfnorm(asin(x)/x - P, [0, 0.5]); // 0x1.1a71ef0a0f26a9fb7ed7e41dee788b13d1770db3dp-52 -constexpr double ASIN_COEFFS[12] = { +static constexpr double ASIN_COEFFS[12] = { 0x1.0000000000000p0, 0x1.5555555556dcfp-3, 0x1.3333333082e11p-4, 0x1.6db6dd14099edp-5, 0x1.f1c69b35bf81fp-6, 0x1.6e97194225a67p-6, 0x1.1babddb82ce12p-6, 0x1.d55bd078600d6p-7, 0x1.33328959e63d6p-7, 0x1.2b5993bda1d9bp-6, -0x1.806aff270bf25p-7, 0x1.02614e5ed3936p-5, }; -LIBC_INLINE double asin_eval(double u) { +LIBC_INLINE static constexpr double asin_eval(double u) { double u2 = u * u; double c0 = fputil::multiply_add(u, ASIN_COEFFS[1], ASIN_COEFFS[0]); double c1 = fputil::multiply_add(u, ASIN_COEFFS[3], ASIN_COEFFS[2]); @@ -124,7 +123,7 @@ LIBC_INLINE double asin_eval(double u) { // > dirtyinfnorm(asin(x)/x - P, [-1/64, 1/64]); // 0x1.999075402cafp-83 -constexpr double ASIN_COEFFS[9][12] = { +static constexpr double ASIN_COEFFS[9][12] = { {1.0, 0.0, 0x1.5555555555555p-3, 0x1.5555555555555p-57, 0x1.3333333333333p-4, 0x1.6db6db6db6db7p-5, 0x1.f1c71c71c71c7p-6, 0x1.6e8ba2e8ba2e9p-6, 0x1.1c4ec4ec4ec4fp-6, 0x1.c99999999999ap-7, @@ -164,8 +163,8 @@ constexpr double ASIN_COEFFS[9][12] = { }; // We calculate the lower part of the approximation P(u). -LIBC_INLINE DoubleDouble asin_eval(const DoubleDouble &u, unsigned &idx, - double &err) { +LIBC_INLINE static DoubleDouble asin_eval(const DoubleDouble &u, unsigned &idx, + double &err) { using fputil::multiply_add; // k = round(u * 32). double k = fputil::nearest_integer(u.hi * 0x1.0p5); @@ -239,7 +238,7 @@ LIBC_INLINE DoubleDouble asin_eval(const DoubleDouble &u, unsigned &idx, // + (676039 x^24)/104857600 + (1300075 x^26)/226492416 + // + (5014575 x^28)/973078528 + (9694845 x^30)/2080374784. -constexpr Float128 ASIN_COEFFS_F128[17][16] = { +static constexpr Float128 ASIN_COEFFS_F128[17][16] = { { {Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, {Sign::POS, -130, 0xaaaaaaaa'aaaaaaaa'aaaaaaaa'aaaaaaab_u128}, @@ -548,13 +547,14 @@ constexpr Float128 ASIN_COEFFS_F128[17][16] = { }, }; -constexpr Float128 PI_OVER_TWO_F128 = { +static constexpr Float128 PI_OVER_TWO_F128 = { Sign::POS, -127, 0xc90fdaa2'2168c234'c4c6628b'80dc1cd1_u128}; -constexpr Float128 PI_F128 = {Sign::POS, -126, - 0xc90fdaa2'2168c234'c4c6628b'80dc1cd1_u128}; +static constexpr Float128 PI_F128 = { + Sign::POS, -126, 0xc90fdaa2'2168c234'c4c6628b'80dc1cd1_u128}; -LIBC_INLINE Float128 asin_eval(const Float128 &u, unsigned idx) { +LIBC_INLINE static constexpr Float128 asin_eval(const Float128 &u, + unsigned idx) { return fputil::polyeval(u, ASIN_COEFFS_F128[idx][0], ASIN_COEFFS_F128[idx][1], ASIN_COEFFS_F128[idx][2], ASIN_COEFFS_F128[idx][3], ASIN_COEFFS_F128[idx][4], ASIN_COEFFS_F128[idx][5], @@ -571,4 +571,4 @@ LIBC_INLINE Float128 asin_eval(const Float128 &u, unsigned idx) { } // namespace LIBC_NAMESPACE_DECL -#endif // LLVM_LIBC_SRC_MATH_GENERIC_ASIN_UTILS_H +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ASIN_UTILS_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index fb253a4502700..7e6a32b7cdf16 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -4016,20 +4016,6 @@ add_entrypoint_object( libc.src.__support.macros.properties.types ) -add_header_library( - asin_utils - HDRS - atan_utils.h - DEPENDS - libc.src.__support.integer_literals - libc.src.__support.FPUtil.double_double - libc.src.__support.FPUtil.dyadic_float - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.nearest_integer - libc.src.__support.FPUtil.polyeval - libc.src.__support.macros.optimization -) - add_entrypoint_object( asin SRCS @@ -4037,7 +4023,7 @@ add_entrypoint_object( HDRS ../asin.h DEPENDS - .asin_utils + libc.src.__support.math.asin_utils libc.src.__support.FPUtil.double_double libc.src.__support.FPUtil.dyadic_float libc.src.__support.FPUtil.fenv_impl @@ -4092,17 +4078,7 @@ add_entrypoint_object( HDRS ../acos.h DEPENDS - .asin_utils - libc.src.__support.FPUtil.double_double - libc.src.__support.FPUtil.dyadic_float - libc.src.__support.FPUtil.fenv_impl - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.polyeval - libc.src.__support.FPUtil.sqrt - libc.src.__support.macros.optimization - libc.src.__support.macros.properties.types - libc.src.__support.macros.properties.cpu_features + libc.src.__support.math.acos ) add_entrypoint_object( diff --git a/libc/src/math/generic/acos.cpp b/libc/src/math/generic/acos.cpp index c14721faef3ce..3a5964290cdd3 100644 --- a/libc/src/math/generic/acos.cpp +++ b/libc/src/math/generic/acos.cpp @@ -7,272 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/math/acos.h" -#include "asin_utils.h" -#include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/PolyEval.h" -#include "src/__support/FPUtil/double_double.h" -#include "src/__support/FPUtil/dyadic_float.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/sqrt.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY -#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA +#include "src/__support/math/acos.h" namespace LIBC_NAMESPACE_DECL { -using DoubleDouble = fputil::DoubleDouble; -using Float128 = fputil::DyadicFloat<128>; - -LLVM_LIBC_FUNCTION(double, acos, (double x)) { - using FPBits = fputil::FPBits; - - FPBits xbits(x); - int x_exp = xbits.get_biased_exponent(); - - // |x| < 0.5. - if (x_exp < FPBits::EXP_BIAS - 1) { - // |x| < 2^-55. - if (LIBC_UNLIKELY(x_exp < FPBits::EXP_BIAS - 55)) { - // When |x| < 2^-55, acos(x) = pi/2 -#if defined(LIBC_MATH_HAS_SKIP_ACCURATE_PASS) - return PI_OVER_TWO.hi; -#else - // Force the evaluation and prevent constant propagation so that it - // is rounded correctly for FE_UPWARD rounding mode. - return (xbits.abs().get_val() + 0x1.0p-160) + PI_OVER_TWO.hi; -#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS - } - -#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - // acos(x) = pi/2 - asin(x) - // = pi/2 - x * P(x^2) - double p = asin_eval(x * x); - return PI_OVER_TWO.hi + fputil::multiply_add(-x, p, PI_OVER_TWO.lo); -#else - unsigned idx; - DoubleDouble x_sq = fputil::exact_mult(x, x); - double err = xbits.abs().get_val() * 0x1.0p-51; - // Polynomial approximation: - // p ~ asin(x)/x - DoubleDouble p = asin_eval(x_sq, idx, err); - // asin(x) ~ x * p - DoubleDouble r0 = fputil::exact_mult(x, p.hi); - // acos(x) = pi/2 - asin(x) - // ~ pi/2 - x * p - // = pi/2 - x * (p.hi + p.lo) - double r_hi = fputil::multiply_add(-x, p.hi, PI_OVER_TWO.hi); - // Use Dekker's 2SUM algorithm to compute the lower part. - double r_lo = ((PI_OVER_TWO.hi - r_hi) - r0.hi) - r0.lo; - r_lo = fputil::multiply_add(-x, p.lo, r_lo + PI_OVER_TWO.lo); - - // Ziv's accuracy test. - - double r_upper = r_hi + (r_lo + err); - double r_lower = r_hi + (r_lo - err); - - if (LIBC_LIKELY(r_upper == r_lower)) - return r_upper; - - // Ziv's accuracy test failed, perform 128-bit calculation. - - // Recalculate mod 1/64. - idx = static_cast(fputil::nearest_integer(x_sq.hi * 0x1.0p6)); - - // Get x^2 - idx/64 exactly. When FMA is available, double-double - // multiplication will be correct for all rounding modes. Otherwise we use - // Float128 directly. - Float128 x_f128(x); - -#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE - // u = x^2 - idx/64 - Float128 u_hi( - fputil::multiply_add(static_cast(idx), -0x1.0p-6, x_sq.hi)); - Float128 u = fputil::quick_add(u_hi, Float128(x_sq.lo)); -#else - Float128 x_sq_f128 = fputil::quick_mul(x_f128, x_f128); - Float128 u = fputil::quick_add( - x_sq_f128, Float128(static_cast(idx) * (-0x1.0p-6))); -#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE - - Float128 p_f128 = asin_eval(u, idx); - // Flip the sign of x_f128 to perform subtraction. - x_f128.sign = x_f128.sign.negate(); - Float128 r = - fputil::quick_add(PI_OVER_TWO_F128, fputil::quick_mul(x_f128, p_f128)); - - return static_cast(r); -#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS - } - // |x| >= 0.5 - - double x_abs = xbits.abs().get_val(); - - // Maintaining the sign: - constexpr double SIGN[2] = {1.0, -1.0}; - double x_sign = SIGN[xbits.is_neg()]; - // |x| >= 1 - if (LIBC_UNLIKELY(x_exp >= FPBits::EXP_BIAS)) { - // x = +-1, asin(x) = +- pi/2 - if (x_abs == 1.0) { - // x = 1, acos(x) = 0, - // x = -1, acos(x) = pi - return x == 1.0 ? 0.0 : fputil::multiply_add(-x_sign, PI.hi, PI.lo); - } - // |x| > 1, return NaN. - if (xbits.is_quiet_nan()) - return x; - - // Set domain error for non-NaN input. - if (!xbits.is_nan()) - fputil::set_errno_if_required(EDOM); - - fputil::raise_except_if_required(FE_INVALID); - return FPBits::quiet_nan().get_val(); - } - - // When |x| >= 0.5, we perform range reduction as follow: - // - // When 0.5 <= x < 1, let: - // y = acos(x) - // We will use the double angle formula: - // cos(2y) = 1 - 2 sin^2(y) - // and the complement angle identity: - // x = cos(y) = 1 - 2 sin^2 (y/2) - // So: - // sin(y/2) = sqrt( (1 - x)/2 ) - // And hence: - // y/2 = asin( sqrt( (1 - x)/2 ) ) - // Equivalently: - // acos(x) = y = 2 * asin( sqrt( (1 - x)/2 ) ) - // Let u = (1 - x)/2, then: - // acos(x) = 2 * asin( sqrt(u) ) - // Moreover, since 0.5 <= x < 1: - // 0 < u <= 1/4, and 0 < sqrt(u) <= 0.5, - // And hence we can reuse the same polynomial approximation of asin(x) when - // |x| <= 0.5: - // acos(x) ~ 2 * sqrt(u) * P(u). - // - // When -1 < x <= -0.5, we reduce to the previous case using the formula: - // acos(x) = pi - acos(-x) - // = pi - 2 * asin ( sqrt( (1 + x)/2 ) ) - // ~ pi - 2 * sqrt(u) * P(u), - // where u = (1 - |x|)/2. - - // u = (1 - |x|)/2 - double u = fputil::multiply_add(x_abs, -0.5, 0.5); - // v_hi + v_lo ~ sqrt(u). - // Let: - // h = u - v_hi^2 = (sqrt(u) - v_hi) * (sqrt(u) + v_hi) - // Then: - // sqrt(u) = v_hi + h / (sqrt(u) + v_hi) - // ~ v_hi + h / (2 * v_hi) - // So we can use: - // v_lo = h / (2 * v_hi). - double v_hi = fputil::sqrt(u); - -#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - constexpr DoubleDouble CONST_TERM[2] = {{0.0, 0.0}, PI}; - DoubleDouble const_term = CONST_TERM[xbits.is_neg()]; - - double p = asin_eval(u); - double scale = x_sign * 2.0 * v_hi; - double r = const_term.hi + fputil::multiply_add(scale, p, const_term.lo); - return r; -#else - -#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE - double h = fputil::multiply_add(v_hi, -v_hi, u); -#else - DoubleDouble v_hi_sq = fputil::exact_mult(v_hi, v_hi); - double h = (u - v_hi_sq.hi) - v_hi_sq.lo; -#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE - - // Scale v_lo and v_hi by 2 from the formula: - // vh = v_hi * 2 - // vl = 2*v_lo = h / v_hi. - double vh = v_hi * 2.0; - double vl = h / v_hi; - - // Polynomial approximation: - // p ~ asin(sqrt(u))/sqrt(u) - unsigned idx; - double err = vh * 0x1.0p-51; - - DoubleDouble p = asin_eval(DoubleDouble{0.0, u}, idx, err); - - // Perform computations in double-double arithmetic: - // asin(x) = pi/2 - (v_hi + v_lo) * (ASIN_COEFFS[idx][0] + p) - DoubleDouble r0 = fputil::quick_mult(DoubleDouble{vl, vh}, p); - - double r_hi, r_lo; - if (xbits.is_pos()) { - r_hi = r0.hi; - r_lo = r0.lo; - } else { - DoubleDouble r = fputil::exact_add(PI.hi, -r0.hi); - r_hi = r.hi; - r_lo = (PI.lo - r0.lo) + r.lo; - } - - // Ziv's accuracy test. - - double r_upper = r_hi + (r_lo + err); - double r_lower = r_hi + (r_lo - err); - - if (LIBC_LIKELY(r_upper == r_lower)) - return r_upper; - - // Ziv's accuracy test failed, we redo the computations in Float128. - // Recalculate mod 1/64. - idx = static_cast(fputil::nearest_integer(u * 0x1.0p6)); - - // After the first step of Newton-Raphson approximating v = sqrt(u), we have - // that: - // sqrt(u) = v_hi + h / (sqrt(u) + v_hi) - // v_lo = h / (2 * v_hi) - // With error: - // sqrt(u) - (v_hi + v_lo) = h * ( 1/(sqrt(u) + v_hi) - 1/(2*v_hi) ) - // = -h^2 / (2*v * (sqrt(u) + v)^2). - // Since: - // (sqrt(u) + v_hi)^2 ~ (2sqrt(u))^2 = 4u, - // we can add another correction term to (v_hi + v_lo) that is: - // v_ll = -h^2 / (2*v_hi * 4u) - // = -v_lo * (h / 4u) - // = -vl * (h / 8u), - // making the errors: - // sqrt(u) - (v_hi + v_lo + v_ll) = O(h^3) - // well beyond 128-bit precision needed. - - // Get the rounding error of vl = 2 * v_lo ~ h / vh - // Get full product of vh * vl -#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE - double vl_lo = fputil::multiply_add(-v_hi, vl, h) / v_hi; -#else - DoubleDouble vh_vl = fputil::exact_mult(v_hi, vl); - double vl_lo = ((h - vh_vl.hi) - vh_vl.lo) / v_hi; -#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE - // vll = 2*v_ll = -vl * (h / (4u)). - double t = h * (-0.25) / u; - double vll = fputil::multiply_add(vl, t, vl_lo); - // m_v = -(v_hi + v_lo + v_ll). - Float128 m_v = fputil::quick_add( - Float128(vh), fputil::quick_add(Float128(vl), Float128(vll))); - m_v.sign = xbits.sign(); - - // Perform computations in Float128: - // acos(x) = (v_hi + v_lo + vll) * P(u) , when 0.5 <= x < 1, - // = pi - (v_hi + v_lo + vll) * P(u) , when -1 < x <= -0.5. - Float128 y_f128(fputil::multiply_add(static_cast(idx), -0x1.0p-6, u)); - - Float128 p_f128 = asin_eval(y_f128, idx); - Float128 r_f128 = fputil::quick_mul(m_v, p_f128); - - if (xbits.is_neg()) - r_f128 = fputil::quick_add(PI_F128, r_f128); - - return static_cast(r_f128); -#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS -} +LLVM_LIBC_FUNCTION(double, acos, (double x)) { return math::acos(x); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/asin.cpp b/libc/src/math/generic/asin.cpp index ad77683d1f880..c033597334345 100644 --- a/libc/src/math/generic/asin.cpp +++ b/libc/src/math/generic/asin.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/math/asin.h" -#include "asin_utils.h" #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/PolyEval.h" @@ -18,6 +17,7 @@ #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA +#include "src/__support/math/asin_utils.h" namespace LIBC_NAMESPACE_DECL { diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index f0b45a99aae40..1d9989debdcdb 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -2077,6 +2077,38 @@ libc_support_library( ], ) +libc_support_library( + name = "__support_math_acos", + hdrs = ["src/__support/math/acos.h"], + deps = [ + ":__support_math_asin_utils", + ":__support_fputil_double_double", + ":__support_fputil_dyadic_float", + ":__support_fputil_fenv_impl", + ":__support_fputil_fp_bits", + ":__support_fputil_multiply_add", + ":__support_fputil_polyeval", + ":__support_fputil_sqrt", + ":__support_macros_optimization", + ":__support_macros_properties_types", + ":__support_macros_properties_cpu_features", + ], +) + +libc_support_library( + name = "__support_math_asin_utils", + hdrs = ["src/__support/math/asin_utils.h"], + deps = [ + ":__support_integer_literals", + ":__support_fputil_double_double", + ":__support_fputil_dyadic_float", + ":__support_fputil_multiply_add", + ":__support_fputil_nearest_integer", + ":__support_fputil_polyeval", + ":__support_macros_optimization", + ], +) + libc_support_library( name = "__support_math_exp_float_constants", hdrs = ["src/__support/math/exp_float_constants.h"], @@ -2554,6 +2586,13 @@ libc_function( ################################ math targets ################################## +libc_math_function( + name = "acos", + additional_deps = [ + ":__support_math_acos", + ], +) + libc_math_function( name = "acosf", additional_deps = [