Skip to content

Commit 9ad7ede

Browse files
authored
[libc][math] Refactor acosf16 implementation to header-only in src/__support/math folder. (#148412)
Part of #147386 in preparation for: https://discourse.llvm.org/t/rfc-make-clang-builtin-math-functions-constexpr-with-llvm-libc-to-support-c-23-constexpr-math-functions/86450
1 parent 8940ab5 commit 9ad7ede

File tree

10 files changed

+233
-156
lines changed

10 files changed

+233
-156
lines changed

libc/shared/math.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
#include "math/acos.h"
1515
#include "math/acosf.h"
16+
#include "math/acosf16.h"
1617
#include "math/exp.h"
1718
#include "math/exp10.h"
1819
#include "math/exp10f.h"

libc/shared/math/acosf16.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
//===-- Shared acosf16 function ---------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SHARED_MATH_ACOSF16_H
10+
#define LLVM_LIBC_SHARED_MATH_ACOSF16_H
11+
12+
#include "include/llvm-libc-macros/float16-macros.h"
13+
14+
#ifdef LIBC_TYPES_HAS_FLOAT16
15+
16+
#include "shared/libc_common.h"
17+
#include "src/__support/math/acosf16.h"
18+
19+
namespace LIBC_NAMESPACE_DECL {
20+
namespace shared {
21+
22+
using math::acosf16;
23+
24+
} // namespace shared
25+
} // namespace LIBC_NAMESPACE_DECL
26+
27+
#endif // LIBC_TYPES_HAS_FLOAT16
28+
29+
#endif // LLVM_LIBC_SHARED_MATH_ACOSF16_H

libc/src/__support/math/CMakeLists.txt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,22 @@ add_header_library(
3131
libc.src.__support.macros.optimization
3232
)
3333

34+
add_header_library(
35+
acosf16
36+
HDRS
37+
acosf16.h
38+
DEPENDS
39+
libc.src.__support.FPUtil.cast
40+
libc.src.__support.FPUtil.except_value_utils
41+
libc.src.__support.FPUtil.fenv_impl
42+
libc.src.__support.FPUtil.fp_bits
43+
libc.src.__support.FPUtil.multiply_add
44+
libc.src.__support.FPUtil.polyeval
45+
libc.src.__support.FPUtil.sqrt
46+
libc.src.__support.macros.optimization
47+
libc.src.__support.macros.properties.types
48+
)
49+
3450
add_header_library(
3551
asin_utils
3652
HDRS

libc/src/__support/math/acos.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ namespace math {
2626

2727
static constexpr double acos(double x) {
2828
using DoubleDouble = fputil::DoubleDouble;
29-
using Float128 = fputil::DyadicFloat<128>;
3029
using namespace asin_internal;
3130
using FPBits = fputil::FPBits<double>;
3231

libc/src/__support/math/acosf16.h

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
//===-- Implementation header for acosf16 -----------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ACOSF16_H
10+
#define LLVM_LIBC_SRC___SUPPORT_MATH_ACOSF16_H
11+
12+
#include "include/llvm-libc-macros/float16-macros.h"
13+
14+
#ifdef LIBC_TYPES_HAS_FLOAT16
15+
16+
#include "src/__support/FPUtil/FEnvImpl.h"
17+
#include "src/__support/FPUtil/FPBits.h"
18+
#include "src/__support/FPUtil/PolyEval.h"
19+
#include "src/__support/FPUtil/cast.h"
20+
#include "src/__support/FPUtil/except_value_utils.h"
21+
#include "src/__support/FPUtil/multiply_add.h"
22+
#include "src/__support/FPUtil/sqrt.h"
23+
#include "src/__support/macros/optimization.h"
24+
25+
namespace LIBC_NAMESPACE_DECL {
26+
27+
namespace math {
28+
29+
static constexpr float16 acosf16(float16 x) {
30+
31+
// Generated by Sollya using the following command:
32+
// > round(pi/2, SG, RN);
33+
// > round(pi, SG, RN);
34+
constexpr float PI_OVER_2 = 0x1.921fb6p0f;
35+
constexpr float PI = 0x1.921fb6p1f;
36+
37+
#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
38+
constexpr size_t N_EXCEPTS = 2;
39+
40+
constexpr fputil::ExceptValues<float16, N_EXCEPTS> ACOSF16_EXCEPTS{{
41+
// (input, RZ output, RU offset, RD offset, RN offset)
42+
{0xacaf, 0x3e93, 1, 0, 0},
43+
{0xb874, 0x4052, 1, 0, 1},
44+
}};
45+
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
46+
47+
using FPBits = fputil::FPBits<float16>;
48+
FPBits xbits(x);
49+
50+
uint16_t x_u = xbits.uintval();
51+
uint16_t x_abs = x_u & 0x7fff;
52+
uint16_t x_sign = x_u >> 15;
53+
54+
// |x| > 0x1p0, |x| > 1, or x is NaN.
55+
if (LIBC_UNLIKELY(x_abs > 0x3c00)) {
56+
// acosf16(NaN) = NaN
57+
if (xbits.is_nan()) {
58+
if (xbits.is_signaling_nan()) {
59+
fputil::raise_except_if_required(FE_INVALID);
60+
return FPBits::quiet_nan().get_val();
61+
}
62+
63+
return x;
64+
}
65+
66+
// 1 < |x| <= +/-inf
67+
fputil::raise_except_if_required(FE_INVALID);
68+
fputil::set_errno_if_required(EDOM);
69+
70+
return FPBits::quiet_nan().get_val();
71+
}
72+
73+
float xf = x;
74+
75+
#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
76+
// Handle exceptional values
77+
if (auto r = ACOSF16_EXCEPTS.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
78+
return r.value();
79+
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
80+
81+
// |x| == 0x1p0, x is 1 or -1
82+
// if x is (-)1, return pi, else
83+
// if x is (+)1, return 0
84+
if (LIBC_UNLIKELY(x_abs == 0x3c00))
85+
return fputil::cast<float16>(x_sign ? PI : 0.0f);
86+
87+
float xsq = xf * xf;
88+
89+
// |x| <= 0x1p-1, |x| <= 0.5
90+
if (x_abs <= 0x3800) {
91+
// if x is 0, return pi/2
92+
if (LIBC_UNLIKELY(x_abs == 0))
93+
return fputil::cast<float16>(PI_OVER_2);
94+
95+
// Note that: acos(x) = pi/2 + asin(-x) = pi/2 - asin(x)
96+
// Degree-6 minimax polynomial of asin(x) generated by Sollya with:
97+
// > P = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8|], [|SG...|], [0, 0.5]);
98+
float interm =
99+
fputil::polyeval(xsq, 0x1.000002p0f, 0x1.554c2ap-3f, 0x1.3541ccp-4f,
100+
0x1.43b2d6p-5f, 0x1.a0d73ep-5f);
101+
return fputil::cast<float16>(fputil::multiply_add(-xf, interm, PI_OVER_2));
102+
}
103+
104+
// When |x| > 0.5, assume that 0.5 < |x| <= 1
105+
//
106+
// Step-by-step range-reduction proof:
107+
// 1: Let y = asin(x), such that, x = sin(y)
108+
// 2: From complimentary angle identity:
109+
// x = sin(y) = cos(pi/2 - y)
110+
// 3: Let z = pi/2 - y, such that x = cos(z)
111+
// 4: From double angle formula; cos(2A) = 1 - 2 * sin^2(A):
112+
// z = 2A, z/2 = A
113+
// cos(z) = 1 - 2 * sin^2(z/2)
114+
// 5: Make sin(z/2) subject of the formula:
115+
// sin(z/2) = sqrt((1 - cos(z))/2)
116+
// 6: Recall [3]; x = cos(z). Therefore:
117+
// sin(z/2) = sqrt((1 - x)/2)
118+
// 7: Let u = (1 - x)/2
119+
// 8: Therefore:
120+
// asin(sqrt(u)) = z/2
121+
// 2 * asin(sqrt(u)) = z
122+
// 9: Recall [3]; z = pi/2 - y. Therefore:
123+
// y = pi/2 - z
124+
// y = pi/2 - 2 * asin(sqrt(u))
125+
// 10: Recall [1], y = asin(x). Therefore:
126+
// asin(x) = pi/2 - 2 * asin(sqrt(u))
127+
// 11: Recall that: acos(x) = pi/2 + asin(-x) = pi/2 - asin(x)
128+
// Therefore:
129+
// acos(x) = pi/2 - (pi/2 - 2 * asin(sqrt(u)))
130+
// acos(x) = 2 * asin(sqrt(u))
131+
//
132+
// THE RANGE REDUCTION, HOW?
133+
// 12: Recall [7], u = (1 - x)/2
134+
// 13: Since 0.5 < x <= 1, therefore:
135+
// 0 <= u <= 0.25 and 0 <= sqrt(u) <= 0.5
136+
//
137+
// Hence, we can reuse the same [0, 0.5] domain polynomial approximation for
138+
// Step [11] as `sqrt(u)` is in range.
139+
// When -1 < x <= -0.5, the identity:
140+
// acos(x) = pi - acos(-x)
141+
// allows us to compute for the negative x value (lhs)
142+
// with a positive x value instead (rhs).
143+
144+
float xf_abs = (xf < 0 ? -xf : xf);
145+
float u = fputil::multiply_add(-0.5f, xf_abs, 0.5f);
146+
float sqrt_u = fputil::sqrt<float>(u);
147+
148+
// Degree-6 minimax polynomial of asin(x) generated by Sollya with:
149+
// > P = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8|], [|SG...|], [0, 0.5]);
150+
float asin_sqrt_u =
151+
sqrt_u * fputil::polyeval(u, 0x1.000002p0f, 0x1.554c2ap-3f,
152+
0x1.3541ccp-4f, 0x1.43b2d6p-5f, 0x1.a0d73ep-5f);
153+
154+
return fputil::cast<float16>(
155+
x_sign ? fputil::multiply_add(-2.0f, asin_sqrt_u, PI) : 2 * asin_sqrt_u);
156+
}
157+
158+
} // namespace math
159+
160+
} // namespace LIBC_NAMESPACE_DECL
161+
162+
#endif // LIBC_TYPES_HAS_FLOAT16
163+
164+
#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ACOS_H

libc/src/math/generic/CMakeLists.txt

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4040,17 +4040,8 @@ add_entrypoint_object(
40404040
HDRS
40414041
../acosf16.h
40424042
DEPENDS
4043-
libc.hdr.errno_macros
4044-
libc.hdr.fenv_macros
4045-
libc.src.__support.FPUtil.cast
4046-
libc.src.__support.FPUtil.except_value_utils
4047-
libc.src.__support.FPUtil.fenv_impl
4048-
libc.src.__support.FPUtil.fp_bits
4049-
libc.src.__support.FPUtil.multiply_add
4050-
libc.src.__support.FPUtil.polyeval
4051-
libc.src.__support.FPUtil.sqrt
4052-
libc.src.__support.macros.optimization
4053-
libc.src.__support.macros.properties.types
4043+
libc.src.__support.math.acosf16
4044+
libc.src.errno.errno
40544045
)
40554046

40564047
add_entrypoint_object(

0 commit comments

Comments
 (0)