diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index d593ad2141..6ebf36917c 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -25,6 +25,7 @@ - End of sampling report now uses `arviz.InferenceData` internally and avoids storing pointwise log likelihood (see [#3883](https://github.com/pymc-devs/pymc3/pull/3883)). - The multiprocessing start method on MacOS is now set to "forkserver", to avoid crashes (see issue [#3849](https://github.com/pymc-devs/pymc3/issues/3849), solved by [#3919](https://github.com/pymc-devs/pymc3/pull/3919)). +- Forced the `Beta` distribution's `random` method to generate samples that are in the open interval $(0, 1)$, i.e. no value can be equal to zero or equal to one (issue [#3898](https://github.com/pymc-devs/pymc3/issues/3898) fixed by [#3924](https://github.com/pymc-devs/pymc3/pull/3924)). ### Deprecations - Remove `sample_ppc` and `sample_ppc_w` that were deprecated in 3.6. diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index c1c511e835..dfa82dc70b 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -32,6 +32,7 @@ from .dist_math import ( alltrue_elemwise, betaln, bound, gammaln, i0e, incomplete_beta, logpow, normal_lccdf, normal_lcdf, SplineWrapper, std_cdf, zvalue, + clipped_beta_rvs, ) from .distribution import (Continuous, draw_values, generate_samples) @@ -1290,7 +1291,7 @@ def random(self, point=None, size=None): """ alpha, beta = draw_values([self.alpha, self.beta], point=point, size=size) - return generate_samples(stats.beta.rvs, alpha, beta, + return generate_samples(clipped_beta_rvs, alpha, beta, dist_shape=self.shape, size=size) diff --git a/pymc3/distributions/dist_math.py b/pymc3/distributions/dist_math.py index 81c595c921..2a7e5c1bb9 100644 --- a/pymc3/distributions/dist_math.py +++ b/pymc3/distributions/dist_math.py @@ -19,6 +19,7 @@ ''' import numpy as np import scipy.linalg +import scipy.stats import theano.tensor as tt import theano from theano.scalar import UnaryScalarOp, upgrade_to_float_no_complex @@ -33,6 +34,11 @@ f = floatX c = - .5 * np.log(2. * np.pi) +_beta_clip_values = { + dtype: (np.nextafter(0, 1, dtype=dtype), np.nextafter(1, 0, dtype=dtype)) + for dtype in ["float16", "float32", "float64", "float128"] +} + def bound(logp, *conditions, **kwargs): @@ -548,3 +554,43 @@ def incomplete_beta(a, b, value): tt.and_(tt.le(b * value, one), tt.le(value, 0.95)), ps, t)) + + +def clipped_beta_rvs(a, b, size=None, dtype="float64"): + """Draw beta distributed random samples in the open :math:`(0, 1)` interval. + + The samples are generated with ``scipy.stats.beta.rvs``, but any value that + is equal to 0 or 1 will be shifted towards the next floating point in the + interval :math:`[0, 1]`, depending on the floating point precision that is + given by ``dtype``. + + Parameters + ---------- + a : float or array_like of floats + Alpha, strictly positive (>0). + b : float or array_like of floats + Beta, strictly positive (>0). + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. If size is ``None`` (default), + a single value is returned if ``a`` and ``b`` are both scalars. + Otherwise, ``np.broadcast(a, b).size`` samples are drawn. + dtype : str or dtype instance + The floating point precision that the samples should have. This also + determines the value that will be used to shift any samples returned + by the numpy random number generator that are zero or one. + + Returns + ------- + out : ndarray or scalar + Drawn samples from the parameterized beta distribution. The scipy + implementation can yield values that are equal to zero or one. We + assume the support of the Beta distribution to be in the open interval + :math:`(0, 1)`, so we shift any sample that is equal to 0 to + ``np.nextafter(0, 1, dtype=dtype)`` and any sample that is equal to 1 + is shifted to ``np.nextafter(1, 0, dtype=dtype)``. + + """ + out = scipy.stats.beta.rvs(a, b, size=size).astype(dtype) + lower, upper = _beta_clip_values[dtype] + return np.maximum(np.minimum(out, upper), lower) \ No newline at end of file diff --git a/pymc3/tests/test_dist_math.py b/pymc3/tests/test_dist_math.py index b55e9792bc..f54f91bc2e 100644 --- a/pymc3/tests/test_dist_math.py +++ b/pymc3/tests/test_dist_math.py @@ -24,7 +24,9 @@ from ..theanof import floatX from ..distributions import Discrete from ..distributions.dist_math import ( - bound, factln, alltrue_scalar, MvNormalLogp, SplineWrapper, i0e) + bound, factln, alltrue_scalar, MvNormalLogp, SplineWrapper, i0e, + clipped_beta_rvs, +) def test_bound(): @@ -216,3 +218,11 @@ def test_grad(self): utt.verify_grad(i0e, [-2.]) utt.verify_grad(i0e, [[0.5, -2.]]) utt.verify_grad(i0e, [[[0.5, -2.]]]) + + +@pytest.mark.parametrize("dtype", ["float16", "float32", "float64", "float128"]) +def test_clipped_beta_rvs(dtype): + # Verify that the samples drawn from the beta distribution are never + # equal to zero or one (issue #3898) + values = clipped_beta_rvs(0.01, 0.01, size=1000000, dtype=dtype) + assert not (np.any(values == 0) or np.any(values == 1)) \ No newline at end of file diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index 5da7c4ce5e..17e7c29140 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -22,6 +22,7 @@ import theano import pymc3 as pm +from pymc3.distributions.dist_math import clipped_beta_rvs from pymc3.distributions.distribution import (draw_values, _DrawValuesContext, _DrawValuesContextBlocker) @@ -548,7 +549,7 @@ def ref_rand(size, mu, lam, alpha): def test_beta(self): def ref_rand(size, alpha, beta): - return st.beta.rvs(a=alpha, b=beta, size=size) + return clipped_beta_rvs(a=alpha, b=beta, size=size) pymc3_random(pm.Beta, {'alpha': Rplus, 'beta': Rplus}, ref_rand=ref_rand) def test_exponential(self):