Skip to content

Commit

Permalink
BUG: stats: Reformulate loggamma._rvs to handle c << 1. (scipy#13349)
Browse files Browse the repository at this point in the history
* BUG: stats: Reformulate loggamma._rvs to handle c << 1.

Closes scipygh-11094.

Several tests in test_morestats.py call loggamma.rvs to generate
test data.  These tests have hardcoded expected results that
depend on the values in the input data.  Because the stream
of variates generated by loggamma.rvs has changed, those tests
will fail if the input data is generated by loggamma.rvs.  Instead,
the data that was generated by a new function that takes the log of random variates from `stats.gamma.rvs`.

Co-authored-by: Matt Haberland <[email protected]>
  • Loading branch information
WarrenWeckesser and mdhaber authored Aug 18, 2022
1 parent bf9e262 commit 2e871e9
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 15 deletions.
14 changes: 13 additions & 1 deletion scipy/stats/_continuous_distns.py
Original file line number Diff line number Diff line change
Expand Up @@ -5355,11 +5355,23 @@ class loggamma_gen(rv_continuous):
%(example)s
"""

def _shape_info(self):
return [_ShapeInfo("c", False, (0, np.inf), (False, False))]

def _rvs(self, c, size=None, random_state=None):
return np.log(random_state.gamma(c, size=size))
# Use the property of the gamma distribution Gamma(c)
# Gamma(c) ~ Gamma(c + 1)*U**(1/c),
# where U is uniform on [0, 1]. (See, e.g.,
# G. Marsaglia and W.W. Tsang, "A simple method for generating gamma
# variables", https://doi.org/10.1145/358407.358414)
# So
# log(Gamma(c)) ~ log(Gamma(c + 1)) + log(U)/c
# Generating a sample with this formulation is a bit slower
# than the more obvious log(Gamma(c)), but it avoids loss
# of precision when c << 1.
return (np.log(random_state.gamma(c + 1, size=size))
+ np.log(random_state.uniform(size=size))/c)

def _pdf(self, x, c):
# loggamma.pdf(x, c) = exp(c*x-exp(x)) / gamma(c)
Expand Down
6 changes: 3 additions & 3 deletions scipy/stats/_morestats.py
Original file line number Diff line number Diff line change
Expand Up @@ -1182,11 +1182,11 @@ def boxcox_normmax(x, brack=None, method='pearsonr', optimizer=None):
>>> lmax_pearsonr = stats.boxcox_normmax(x)
>>> lmax_mle
1.4613865614008015
2.217563431465757
>>> lmax_pearsonr
1.6685004886804342
2.238318660200961
>>> stats.boxcox_normmax(x, method='all')
array([1.66850049, 1.46138656])
array([2.23831866, 2.21756343])
>>> fig = plt.figure()
>>> ax = fig.add_subplot(111)
Expand Down
2 changes: 1 addition & 1 deletion scipy/stats/tests/test_continuous_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ def test_rvs_broadcast(dist, shape_args):
shape_only = dist in ['argus', 'betaprime', 'dgamma', 'dweibull',
'exponnorm', 'genhyperbolic', 'geninvgauss',
'levy_stable', 'nct', 'norminvgauss', 'rice',
'skewnorm', 'semicircular', 'gennorm']
'skewnorm', 'semicircular', 'gennorm', 'loggamma']

distfunc = getattr(stats, dist)
loc = np.zeros(2)
Expand Down
14 changes: 14 additions & 0 deletions scipy/stats/tests/test_distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1324,6 +1324,20 @@ def test_stats(self):
assert_array_almost_equal(computed, [mean, var, skew, kurt],
decimal=4)

@pytest.mark.parametrize('c', [0.1, 0.001])
def test_rvs(self, c):
# Regression test for gh-11094.
x = stats.loggamma.rvs(c, size=100000)
# Before gh-11094 was fixed, the case with c=0.001 would
# generate many -inf values.
assert np.isfinite(x).all()
# Crude statistical test. About half the values should be
# less than the median and half greater than the median.
med = stats.loggamma.median(c)
btest = stats.binomtest(np.count_nonzero(x < med), len(x))
ci = btest.proportion_ci(confidence_level=0.999)
assert ci.low < 0.5 < ci.high


class TestLogistic:
# gh-6226
Expand Down
26 changes: 16 additions & 10 deletions scipy/stats/tests/test_morestats.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@
g10 = [0.991, 0.995, 0.984, 0.994, 0.997, 0.997, 0.991, 0.998, 1.004, 0.997]


# The loggamma RVS stream is changing due to gh-13349; this version
# preserves the old stream so that tests don't change.
def _old_loggamma_rvs(*args, **kwargs):
return np.log(stats.gamma.rvs(*args, **kwargs))


class TestBayes_mvs:
def test_basic(self):
# Expected values in this test simply taken from the function. For
Expand Down Expand Up @@ -1643,7 +1649,7 @@ def test_bad_arg(self):

class TestPpccPlot:
def setup_method(self):
self.x = stats.loggamma.rvs(5, size=500, random_state=7654321) + 5
self.x = _old_loggamma_rvs(5, size=500, random_state=7654321) + 5

def test_basic(self):
N = 5
Expand Down Expand Up @@ -1825,7 +1831,7 @@ def test_gh_6873(self):
class TestBoxcox:

def test_fixed_lmbda(self):
x = stats.loggamma.rvs(5, size=50, random_state=12345) + 5
x = _old_loggamma_rvs(5, size=50, random_state=12345) + 5
xt = stats.boxcox(x, lmbda=1)
assert_allclose(xt, x - 1)
xt = stats.boxcox(x, lmbda=-1)
Expand Down Expand Up @@ -1854,7 +1860,7 @@ def test_lmbda_None(self):

def test_alpha(self):
rng = np.random.RandomState(1234)
x = stats.loggamma.rvs(5, size=50, random_state=rng) + 5
x = _old_loggamma_rvs(5, size=50, random_state=rng) + 5

# Some regular values for alpha, on a small sample size
_, _, interval = stats.boxcox(x, alpha=0.75)
Expand All @@ -1863,7 +1869,7 @@ def test_alpha(self):
assert_allclose(interval, [1.2138178554857557, 8.209033272375663])

# Try some extreme values, see we don't hit the N=500 limit
x = stats.loggamma.rvs(7, size=500, random_state=rng) + 15
x = _old_loggamma_rvs(7, size=500, random_state=rng) + 15
_, _, interval = stats.boxcox(x, alpha=0.001)
assert_allclose(interval, [0.3988867, 11.40553131])
_, _, interval = stats.boxcox(x, alpha=0.999)
Expand Down Expand Up @@ -1941,7 +1947,7 @@ def optimizer(fun):

class TestBoxcoxNormmax:
def setup_method(self):
self.x = stats.loggamma.rvs(5, size=50, random_state=12345) + 5
self.x = _old_loggamma_rvs(5, size=50, random_state=12345) + 5

def test_pearsonr(self):
maxlog = stats.boxcox_normmax(self.x)
Expand Down Expand Up @@ -2013,7 +2019,7 @@ def test_user_defined_optimizer_and_brack_raises_error(self):

class TestBoxcoxNormplot:
def setup_method(self):
self.x = stats.loggamma.rvs(5, size=500, random_state=7654321) + 5
self.x = _old_loggamma_rvs(5, size=500, random_state=7654321) + 5

def test_basic(self):
N = 5
Expand Down Expand Up @@ -2072,7 +2078,7 @@ def test_fixed_lmbda(self):
rng = np.random.RandomState(12345)

# Test positive input
x = stats.loggamma.rvs(5, size=50, random_state=rng) + 5
x = _old_loggamma_rvs(5, size=50, random_state=rng) + 5
assert np.all(x > 0)
xt = stats.yeojohnson(x, lmbda=1)
assert_allclose(xt, x)
Expand All @@ -2084,7 +2090,7 @@ def test_fixed_lmbda(self):
assert_allclose(xt, x)

# Test negative input
x = stats.loggamma.rvs(5, size=50, random_state=rng) - 5
x = _old_loggamma_rvs(5, size=50, random_state=rng) - 5
assert np.all(x < 0)
xt = stats.yeojohnson(x, lmbda=2)
assert_allclose(xt, -np.log(-x + 1))
Expand All @@ -2094,7 +2100,7 @@ def test_fixed_lmbda(self):
assert_allclose(xt, 1 / (-x + 1) - 1)

# test both positive and negative input
x = stats.loggamma.rvs(5, size=50, random_state=rng) - 2
x = _old_loggamma_rvs(5, size=50, random_state=rng) - 2
assert not np.all(x < 0)
assert not np.all(x >= 0)
pos = x >= 0
Expand Down Expand Up @@ -2195,7 +2201,7 @@ def test_input_high_variance(self):

class TestYeojohnsonNormmax:
def setup_method(self):
self.x = stats.loggamma.rvs(5, size=50, random_state=12345) + 5
self.x = _old_loggamma_rvs(5, size=50, random_state=12345) + 5

def test_mle(self):
maxlog = stats.yeojohnson_normmax(self.x)
Expand Down

0 comments on commit 2e871e9

Please sign in to comment.