From 63780b64c97b1c03e360b76ea6b103dc4ed40ee7 Mon Sep 17 00:00:00 2001
From: Josh Wilson <person142@users.noreply.github.com>
Date: Thu, 31 Oct 2019 07:03:12 -0700
Subject: [PATCH] BUG: prevent underflow/overflow when finding roots in IRR

NumPy's roots function uses the companion matrix to find polynomial
roots. In the process it makes the polynomial monic by dividing by the
leading coefficient, which can cause overflow/underflow. This can be
avoided by working with a scaled version of the companion matrix
instead. Since scaling the matrix simply scales the eigenvalues
(i.e. the roots of the polynomial in this case), the original roots
can easily be recovered.

Closes gh-15.
---
 numpy_financial/_financial.py           |  46 +++++++++--
 numpy_financial/tests/test_financial.py | 104 ++++++++++++++++++------
 2 files changed, 118 insertions(+), 32 deletions(-)

diff --git a/numpy_financial/_financial.py b/numpy_financial/_financial.py
index ce8abaa..bcfd01e 100644
--- a/numpy_financial/_financial.py
+++ b/numpy_financial/_financial.py
@@ -666,6 +666,35 @@ def rate(nper, pmt, pv, fv, when='end', guess=None, tol=None, maxiter=100):
         return rn
 
 
+def _roots(p):
+    """Modified version of NumPy's roots function.
+
+    NumPy's roots uses the companion matrix method, which divides by
+    p[0]. This can causes overflows/underflows. Instead form a
+    modified companion matrix that is scaled by 2^c * p[0], where the
+    exponent c is chosen to balance the magnitudes of the
+    coefficients. Since scaling the matrix just scales the
+    eigenvalues, we can remove the scaling at the end.
+
+    Scaling by a power of 2 is chosen to avoid rounding errors.
+
+    """
+    _, e = np.frexp(p)
+    # Balance the most extreme exponents e_max and e_min by solving
+    # the equation
+    #
+    # |c + e_max| = |c + e_min|.
+    #
+    # Round the exponent to an integer to avoid rounding errors.
+    c = int(-0.5 * (np.max(e) + np.min(e)))
+    p = np.ldexp(p, c)
+
+    A = np.diag(np.full(p.size - 2, p[0]), k=-1)
+    A[0,:] = -p[1:]
+    eigenvalues = np.linalg.eigvals(A)
+    return eigenvalues / p[0]
+
+
 def irr(values):
     """
     Return the Internal Rate of Return (IRR).
@@ -729,12 +758,17 @@ def irr(values):
     0.0886
 
     """
-    # `np.roots` call is why this function does not support Decimal type.
-    #
-    # Ultimately Decimal support needs to be added to np.roots, which has
-    # greater implications on the entire linear algebra module and how it does
-    # eigenvalue computations.
-    res = np.roots(values[::-1])
+    values = np.atleast_1d(values)
+    if values.ndim != 1:
+        raise ValueError("Cashflows must be a rank-1 array")
+
+    # Strip leading and trailing zeros. Since we only care about
+    # positive roots we can neglect roots at zero.
+    non_zero = np.nonzero(np.ravel(values))[0]
+    values = values[int(non_zero[0]):int(non_zero[-1])+1]
+
+    res = _roots(values[::-1])
+
     mask = (res.imag == 0) & (res.real > 0)
     if not mask.any():
         return np.nan
diff --git a/numpy_financial/tests/test_financial.py b/numpy_financial/tests/test_financial.py
index 8070ffe..2166f06 100644
--- a/numpy_financial/tests/test_financial.py
+++ b/numpy_financial/tests/test_financial.py
@@ -13,13 +13,6 @@
 
 
 class TestFinancial(object):
-    def test_npv_irr_congruence(self):
-        # IRR is defined as the rate required for the present value of a
-        # a series of cashflows to be zero i.e. NPV(IRR(x), x) = 0
-        cashflows = numpy.array([-40000, 5000, 8000, 12000, 30000])
-        assert_allclose(npf.npv(npf.irr(cashflows), cashflows), 0,
-                        atol=1e-10, rtol=0)
-
     def test_rate(self):
         assert_almost_equal(npf.rate(10, 0, -3500, 10000), 0.1107, 4)
 
@@ -28,25 +21,6 @@ def test_rate_decimal(self):
                         Decimal('10000'))
         assert_equal(Decimal('0.1106908537142689284704528100'), rate)
 
-    def test_irr(self):
-        v = [-150000, 15000, 25000, 35000, 45000, 60000]
-        assert_almost_equal(npf.irr(v), 0.0524, 2)
-        v = [-100, 0, 0, 74]
-        assert_almost_equal(npf.irr(v), -0.0955, 2)
-        v = [-100, 39, 59, 55, 20]
-        assert_almost_equal(npf.irr(v), 0.28095, 2)
-        v = [-100, 100, 0, -7]
-        assert_almost_equal(npf.irr(v), -0.0833, 2)
-        v = [-100, 100, 0, 7]
-        assert_almost_equal(npf.irr(v), 0.06206, 2)
-        v = [-5, 10.5, 1, -8, 1]
-        assert_almost_equal(npf.irr(v), 0.0886, 2)
-
-        # Test that if there is no solution then npf.irr returns nan
-        # Fixes gh-6744
-        v = [-1, -2, -3]
-        assert_equal(npf.irr(v), numpy.nan)
-
     def test_pv(self):
         assert_almost_equal(npf.pv(0.07, 20, 12000, 0), -127128.17, 2)
 
@@ -514,3 +488,81 @@ def test_some_rates_zero(self):
             [-500, -610.51],  # Computed using Google Sheet's FV
             rtol=1e-10,
         )
+
+
+class TestIrr:
+    def test_npv_irr_congruence(self):
+        # IRR is defined as the rate required for the present value of
+        # a a series of cashflows to be zero, so we should have
+        #
+        # NPV(IRR(x), x) = 0.
+        cashflows = numpy.array([-40000, 5000, 8000, 12000, 30000])
+        assert_allclose(
+            npf.npv(npf.irr(cashflows), cashflows),
+            0,
+            atol=1e-10,
+            rtol=0,
+        )
+
+    @pytest.mark.parametrize('v, desired', [
+        ([-150000, 15000, 25000, 35000, 45000, 60000], 0.0524),
+        ([-100, 0, 0, 74], -0.0955),
+        ([-100, 39, 59, 55, 20], 0.28095),
+        ([-100, 100, 0, -7], -0.0833),
+        ([-100, 100, 0, 7], 0.06206),
+        ([-5, 10.5, 1, -8, 1], 0.0886),
+    ])
+    def test_basic_values(self, v, desired):
+        assert_almost_equal(npf.irr(v), desired, decimal=2)
+
+    def test_trailing_zeros(self):
+        assert_almost_equal(
+            npf.irr([-5, 10.5, 1, -8, 1, 0, 0, 0]),
+            0.0886,
+            decimal=2,
+        )
+
+    def test_numpy_gh_6744(self):
+        # Test that if there is no solution then npf.irr returns nan.
+        v = [-1, -2, -3]
+        assert numpy.isnan(npf.irr(v))
+
+    def test_gh_15(self):
+        v = [
+            -3000.0,
+            2.3926932267015667e-07,
+            4.1672087103345505e-16,
+            5.3965110036378706e-25,
+            5.1962551071806174e-34,
+            3.7202955645436402e-43,
+            1.9804961711632469e-52,
+            7.8393517651814181e-62,
+            2.3072565113911438e-71,
+            5.0491839233308912e-81,
+            8.2159177668499263e-91,
+            9.9403244366963527e-101,
+            8.942410813633967e-111,
+            5.9816122646481191e-121,
+            2.9750309031844241e-131,
+            1.1002067043497954e-141,
+            3.0252876563518021e-152,
+            6.1854121948207909e-163,
+            9.4032980015353301e-174,
+            1.0629218520017728e-184,
+            8.9337141847171845e-196,
+            5.5830607698467935e-207,
+            2.5943122036622652e-218,
+            8.9635842466507006e-230,
+            2.3027710094332358e-241,
+            4.3987510596745562e-253,
+            6.2476630372575209e-265,
+            6.598046841695288e-277,
+            5.1811095266842017e-289,
+            3.0250999925830644e-301,
+            1.3133070599585015e-313,
+        ]
+        result = npf.irr(v)
+        assert numpy.isfinite(result)
+        # Very rough approximation taken from the issue.
+        desired = -0.9999999990596069
+        assert_allclose(result, desired, rtol=1e-9)