Skip to content

Commit 6939b18

Browse files
committed
[libc][math] Refactor acosf implementation to header-only in src/__support/math folder.
1 parent cfddb40 commit 6939b18

File tree

12 files changed

+323
-285
lines changed

12 files changed

+323
-285
lines changed

libc/shared/math.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "libc_common.h"
1313

1414
#include "math/acos.h"
15+
#include "math/acosf.h"
1516
#include "math/exp.h"
1617
#include "math/exp10.h"
1718
#include "math/exp10f.h"

libc/shared/math/acosf.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
//===-- Shared acosf function -----------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SHARED_MATH_ACOSF_H
10+
#define LLVM_LIBC_SHARED_MATH_ACOSF_H
11+
12+
#include "shared/libc_common.h"
13+
#include "src/__support/math/acosf.h"
14+
15+
namespace LIBC_NAMESPACE_DECL {
16+
namespace shared {
17+
18+
using math::acosf;
19+
20+
} // namespace shared
21+
} // namespace LIBC_NAMESPACE_DECL
22+
23+
#endif // LLVM_LIBC_SHARED_MATH_ACOSF_H

libc/src/__support/math/CMakeLists.txt

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,20 @@ add_header_library(
1717
libc.src.__support.macros.properties.cpu_features
1818
)
1919

20+
add_header_library(
21+
acosf
22+
HDRS
23+
acosf.h
24+
DEPENDS
25+
.inv_trigf_utils
26+
libc.src.__support.FPUtil.except_value_utils
27+
libc.src.__support.FPUtil.fp_bits
28+
libc.src.__support.FPUtil.multiply_add
29+
libc.src.__support.FPUtil.polyeval
30+
libc.src.__support.FPUtil.sqrt
31+
libc.src.__support.macros.optimization
32+
)
33+
2034
add_header_library(
2135
asin_utils
2236
HDRS
@@ -98,6 +112,16 @@ add_header_library(
98112
libc.src.__support.FPUtil.manipulation_functions
99113
)
100114

115+
add_header_library(
116+
inv_trigf_utils
117+
HDRS
118+
inv_trigf_utils.h
119+
DEPENDS
120+
libc.src.__support.FPUtil.multiply_add
121+
libc.src.__support.FPUtil.polyeval
122+
libc.src.__support.common
123+
)
124+
101125
add_header_library(
102126
frexpf16
103127
HDRS

libc/src/__support/math/acosf.h

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
//===-- Implementation header for acosf -------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ACOSF_H
10+
#define LLVM_LIBC_SRC___SUPPORT_MATH_ACOSF_H
11+
12+
#include "inv_trigf_utils.h"
13+
#include "src/__support/FPUtil/FEnvImpl.h"
14+
#include "src/__support/FPUtil/FPBits.h"
15+
#include "src/__support/FPUtil/except_value_utils.h"
16+
#include "src/__support/FPUtil/multiply_add.h"
17+
#include "src/__support/FPUtil/sqrt.h"
18+
#include "src/__support/macros/config.h"
19+
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
20+
21+
namespace LIBC_NAMESPACE_DECL {
22+
23+
namespace math {
24+
25+
#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
26+
static constexpr size_t N_EXCEPTS = 4;
27+
28+
// Exceptional values when |x| <= 0.5
29+
static constexpr fputil::ExceptValues<float, N_EXCEPTS> ACOSF_EXCEPTS = {{
30+
// (inputs, RZ output, RU offset, RD offset, RN offset)
31+
// x = 0x1.110b46p-26, acosf(x) = 0x1.921fb4p0 (RZ)
32+
{0x328885a3, 0x3fc90fda, 1, 0, 1},
33+
// x = -0x1.110b46p-26, acosf(x) = 0x1.921fb4p0 (RZ)
34+
{0xb28885a3, 0x3fc90fda, 1, 0, 1},
35+
// x = 0x1.04c444p-12, acosf(x) = 0x1.920f68p0 (RZ)
36+
{0x39826222, 0x3fc907b4, 1, 0, 1},
37+
// x = -0x1.04c444p-12, acosf(x) = 0x1.923p0 (RZ)
38+
{0xb9826222, 0x3fc91800, 1, 0, 1},
39+
}};
40+
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
41+
42+
static constexpr float acosf(float x) {
43+
using FPBits = typename fputil::FPBits<float>;
44+
45+
FPBits xbits(x);
46+
uint32_t x_uint = xbits.uintval();
47+
uint32_t x_abs = xbits.uintval() & 0x7fff'ffffU;
48+
uint32_t x_sign = x_uint >> 31;
49+
50+
// |x| <= 0.5
51+
if (LIBC_UNLIKELY(x_abs <= 0x3f00'0000U)) {
52+
// |x| < 0x1p-10
53+
if (LIBC_UNLIKELY(x_abs < 0x3a80'0000U)) {
54+
// When |x| < 2^-10, we use the following approximation:
55+
// acos(x) = pi/2 - asin(x)
56+
// ~ pi/2 - x - x^3 / 6
57+
58+
#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
59+
// Check for exceptional values
60+
if (auto r = ACOSF_EXCEPTS.lookup(x_uint); LIBC_UNLIKELY(r.has_value()))
61+
return r.value();
62+
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
63+
64+
double xd = static_cast<double>(x);
65+
return static_cast<float>(fputil::multiply_add(
66+
-0x1.5555555555555p-3 * xd, xd * xd, M_MATH_PI_2 - xd));
67+
}
68+
69+
// For |x| <= 0.5, we approximate acosf(x) by:
70+
// acos(x) = pi/2 - asin(x) = pi/2 - x * P(x^2)
71+
// Where P(X^2) = Q(X) is a degree-20 minimax even polynomial approximating
72+
// asin(x)/x on [0, 0.5] generated by Sollya with:
73+
// > Q = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20|],
74+
// [|1, D...|], [0, 0.5]);
75+
double xd = static_cast<double>(x);
76+
double xsq = xd * xd;
77+
double x3 = xd * xsq;
78+
double r = asin_eval(xsq);
79+
return static_cast<float>(fputil::multiply_add(-x3, r, M_MATH_PI_2 - xd));
80+
}
81+
82+
// |x| >= 1, return 0, 2pi, or NaNs.
83+
if (LIBC_UNLIKELY(x_abs >= 0x3f80'0000U)) {
84+
if (x_abs == 0x3f80'0000U)
85+
return x_sign ? /* x == -1.0f */ fputil::round_result_slightly_down(
86+
0x1.921fb6p+1f)
87+
: /* x == 1.0f */ 0.0f;
88+
89+
if (xbits.is_signaling_nan()) {
90+
fputil::raise_except_if_required(FE_INVALID);
91+
return FPBits::quiet_nan().get_val();
92+
}
93+
94+
// |x| <= +/-inf
95+
if (x_abs <= 0x7f80'0000U) {
96+
fputil::set_errno_if_required(EDOM);
97+
fputil::raise_except_if_required(FE_INVALID);
98+
}
99+
100+
return x + FPBits::quiet_nan().get_val();
101+
}
102+
103+
// When 0.5 < |x| < 1, we perform range reduction as follow:
104+
//
105+
// Assume further that 0.5 < x <= 1, and let:
106+
// y = acos(x)
107+
// We use the double angle formula:
108+
// x = cos(y) = 1 - 2 sin^2(y/2)
109+
// So:
110+
// sin(y/2) = sqrt( (1 - x)/2 )
111+
// And hence:
112+
// y = 2 * asin( sqrt( (1 - x)/2 ) )
113+
// Let u = (1 - x)/2, then
114+
// acos(x) = 2 * asin( sqrt(u) )
115+
// Moreover, since 0.5 < x <= 1,
116+
// 0 <= u < 1/4, and 0 <= sqrt(u) < 0.5,
117+
// And hence we can reuse the same polynomial approximation of asin(x) when
118+
// |x| <= 0.5:
119+
// acos(x) ~ 2 * sqrt(u) * P(u).
120+
//
121+
// When -1 < x <= -0.5, we use the identity:
122+
// acos(x) = pi - acos(-x)
123+
// which is reduced to the postive case.
124+
125+
xbits.set_sign(Sign::POS);
126+
double xd = static_cast<double>(xbits.get_val());
127+
double u = fputil::multiply_add(-0.5, xd, 0.5);
128+
double cv = 2 * fputil::sqrt<double>(u);
129+
130+
double r3 = asin_eval(u);
131+
double r = fputil::multiply_add(cv * u, r3, cv);
132+
return static_cast<float>(x_sign ? M_MATH_PI - r : r);
133+
}
134+
135+
} // namespace math
136+
137+
} // namespace LIBC_NAMESPACE_DECL
138+
139+
#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ACOS_H

libc/src/math/generic/inv_trigf_utils.cpp renamed to libc/src/__support/math/inv_trigf_utils.h

Lines changed: 97 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,25 @@
1-
//===-- Single-precision general exp/log functions ------------------------===//
1+
//===-- Single-precision general inverse trigonometric functions ----------===//
22
//
33
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44
// See https://llvm.org/LICENSE.txt for license information.
55
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66
//
77
//===----------------------------------------------------------------------===//
88

9-
#include "inv_trigf_utils.h"
9+
#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_INV_TRIGF_UTILS_H
10+
#define LLVM_LIBC_SRC___SUPPORT_MATH_INV_TRIGF_UTILS_H
11+
12+
#include "src/__support/FPUtil/PolyEval.h"
13+
#include "src/__support/FPUtil/multiply_add.h"
14+
#include "src/__support/common.h"
1015
#include "src/__support/macros/config.h"
1116

1217
namespace LIBC_NAMESPACE_DECL {
1318

19+
// PI and PI / 2
20+
static constexpr double M_MATH_PI = 0x1.921fb54442d18p+1;
21+
static constexpr double M_MATH_PI_2 = 0x1.921fb54442d18p+0;
22+
1423
// Polynomial approximation for 0 <= x <= 1:
1524
// atan(x) ~ atan((i/16) + (x - (i/16)) * Q(x - i/16)
1625
// = P(x - i/16)
@@ -29,7 +38,7 @@ namespace LIBC_NAMESPACE_DECL {
2938
// Notice that degree-7 is good enough for atanf, but degree-8 helps reduce the
3039
// error bounds for atan2f's fast pass 16 times, and it does not affect the
3140
// performance of atanf much.
32-
double ATAN_COEFFS[17][9] = {
41+
static constexpr double ATAN_COEFFS[17][9] = {
3342
{0.0, 1.0, 0x1.3f8d76d26d61bp-47, -0x1.5555555574cd8p-2,
3443
0x1.0dde5d06878eap-29, 0x1.99997738acc77p-3, 0x1.2c43eac9797cap-16,
3544
-0x1.25fb020007dbdp-3, 0x1.c1b6c31d7b0aep-7},
@@ -83,4 +92,89 @@ double ATAN_COEFFS[17][9] = {
8392
0x1.555e31a1e15e9p-6, -0x1.245240d65e629p-7, -0x1.fa9ba66478903p-11},
8493
};
8594

95+
// Look-up table for atan(k/16) with k = 0..16.
96+
static constexpr double ATAN_K_OVER_16[17] = {
97+
0.0,
98+
0x1.ff55bb72cfdeap-5,
99+
0x1.fd5ba9aac2f6ep-4,
100+
0x1.7b97b4bce5b02p-3,
101+
0x1.f5b75f92c80ddp-3,
102+
0x1.362773707ebccp-2,
103+
0x1.6f61941e4def1p-2,
104+
0x1.a64eec3cc23fdp-2,
105+
0x1.dac670561bb4fp-2,
106+
0x1.0657e94db30dp-1,
107+
0x1.1e00babdefeb4p-1,
108+
0x1.345f01cce37bbp-1,
109+
0x1.4978fa3269ee1p-1,
110+
0x1.5d58987169b18p-1,
111+
0x1.700a7c5784634p-1,
112+
0x1.819d0b7158a4dp-1,
113+
0x1.921fb54442d18p-1,
114+
};
115+
116+
// For |x| <= 1/32 and 0 <= i <= 16, return Q(x) such that:
117+
// Q(x) ~ (atan(x + i/16) - atan(i/16)) / x.
118+
LIBC_INLINE static constexpr double atan_eval(double x, unsigned i) {
119+
double x2 = x * x;
120+
121+
double c0 = fputil::multiply_add(x, ATAN_COEFFS[i][2], ATAN_COEFFS[i][1]);
122+
double c1 = fputil::multiply_add(x, ATAN_COEFFS[i][4], ATAN_COEFFS[i][3]);
123+
double c2 = fputil::multiply_add(x, ATAN_COEFFS[i][6], ATAN_COEFFS[i][5]);
124+
double c3 = fputil::multiply_add(x, ATAN_COEFFS[i][8], ATAN_COEFFS[i][7]);
125+
126+
double x4 = x2 * x2;
127+
double d1 = fputil::multiply_add(x2, c1, c0);
128+
double d2 = fputil::multiply_add(x2, c3, c2);
129+
double p = fputil::multiply_add(x4, d2, d1);
130+
return p;
131+
}
132+
133+
// Evaluate atan without big lookup table.
134+
// atan(n/d) - atan(k/16) = atan((n/d - k/16) / (1 + (n/d) * (k/16)))
135+
// = atan((n - d * k/16)) / (d + n * k/16))
136+
// So we let q = (n - d * k/16) / (d + n * k/16),
137+
// and approximate with Taylor polynomial:
138+
// atan(q) ~ q - q^3/3 + q^5/5 - q^7/7 + q^9/9
139+
LIBC_INLINE static constexpr double atan_eval_no_table(double num, double den,
140+
double k_over_16) {
141+
double num_r = fputil::multiply_add(den, -k_over_16, num);
142+
double den_r = fputil::multiply_add(num, k_over_16, den);
143+
double q = num_r / den_r;
144+
145+
constexpr double ATAN_TAYLOR[] = {
146+
-0x1.5555555555555p-2,
147+
0x1.999999999999ap-3,
148+
-0x1.2492492492492p-3,
149+
0x1.c71c71c71c71cp-4,
150+
};
151+
double q2 = q * q;
152+
double q3 = q2 * q;
153+
double q4 = q2 * q2;
154+
double c0 = fputil::multiply_add(q2, ATAN_TAYLOR[1], ATAN_TAYLOR[0]);
155+
double c1 = fputil::multiply_add(q2, ATAN_TAYLOR[3], ATAN_TAYLOR[2]);
156+
double d = fputil::multiply_add(q4, c1, c0);
157+
return fputil::multiply_add(q3, d, q);
158+
}
159+
160+
// > Q = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20|],
161+
// [|1, D...|], [0, 0.5]);
162+
static constexpr double ASIN_COEFFS[10] = {
163+
0x1.5555555540fa1p-3, 0x1.333333512edc2p-4, 0x1.6db6cc1541b31p-5,
164+
0x1.f1caff324770ep-6, 0x1.6e43899f5f4f4p-6, 0x1.1f847cf652577p-6,
165+
0x1.9b60f47f87146p-7, 0x1.259e2634c494fp-6, -0x1.df946fa875ddp-8,
166+
0x1.02311ecf99c28p-5};
167+
168+
// Evaluate P(x^2) - 1, where P(x^2) ~ asin(x)/x
169+
LIBC_INLINE static constexpr double asin_eval(double xsq) {
170+
double x4 = xsq * xsq;
171+
double r1 = fputil::polyeval(x4, ASIN_COEFFS[0], ASIN_COEFFS[2],
172+
ASIN_COEFFS[4], ASIN_COEFFS[6], ASIN_COEFFS[8]);
173+
double r2 = fputil::polyeval(x4, ASIN_COEFFS[1], ASIN_COEFFS[3],
174+
ASIN_COEFFS[5], ASIN_COEFFS[7], ASIN_COEFFS[9]);
175+
return fputil::multiply_add(xsq, r2, r1);
176+
}
177+
86178
} // namespace LIBC_NAMESPACE_DECL
179+
180+
#endif // LLVM_LIBC_SRC___SUPPORT_MATH_INV_TRIGF_UTILS_H

0 commit comments

Comments
 (0)