1# Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Vectorized Exponential distribution class, directly using LinearOperator.""" 16 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21from tensorflow.contrib.distributions.python.ops import bijectors 22from tensorflow.contrib.distributions.python.ops import distribution_util 23from tensorflow.python.framework import ops 24from tensorflow.python.ops import array_ops 25from tensorflow.python.ops import math_ops 26from tensorflow.python.ops.distributions import exponential 27from tensorflow.python.ops.distributions import transformed_distribution 28from tensorflow.python.ops.linalg import linalg 29 30__all__ = ["VectorExponentialLinearOperator"] 31 32_mvn_sample_note = """ 33`value` is a batch vector with compatible shape if `value` is a `Tensor` whose 34shape can be broadcast up to either: 35 36```python 37self.batch_shape + self.event_shape 38``` 39 40or 41 42```python 43[M1, ..., Mm] + self.batch_shape + self.event_shape 44``` 45 46""" 47 48 49class VectorExponentialLinearOperator( 50 transformed_distribution.TransformedDistribution): 51 """The vectorization of the Exponential distribution on `R^k`. 52 53 The vector exponential distribution is defined over a subset of `R^k`, and 54 parameterized by a (batch of) length-`k` `loc` vector and a (batch of) `k x k` 55 `scale` matrix: `covariance = scale @ scale.T`, where `@` denotes 56 matrix-multiplication. 57 58 #### Mathematical Details 59 60 The probability density function (pdf) is 61 62 ```none 63 pdf(y; loc, scale) = exp(-||x||_1) / Z, for y in S(loc, scale), 64 x = inv(scale) @ (y - loc), 65 Z = |det(scale)|, 66 ``` 67 68 where: 69 70 * `loc` is a vector in `R^k`, 71 * `scale` is a linear operator in `R^{k x k}`, `cov = scale @ scale.T`, 72 * `S = {loc + scale @ x : x in R^k, x_1 > 0, ..., x_k > 0}`, is an image of 73 the positive half-space, 74 * `||x||_1` denotes the `l1` norm of `x`, `sum_i |x_i|`, 75 * `Z` denotes the normalization constant. 76 77 The VectorExponential distribution is a member of the [location-scale 78 family](https://en.wikipedia.org/wiki/Location-scale_family), i.e., it can be 79 constructed as, 80 81 ```none 82 X = (X_1, ..., X_k), each X_i ~ Exponential(rate=1) 83 Y = (Y_1, ...,Y_k) = scale @ X + loc 84 ``` 85 86 #### About `VectorExponential` and `Vector` distributions in TensorFlow. 87 88 The `VectorExponential` is a non-standard distribution that has useful 89 properties. 90 91 The marginals `Y_1, ..., Y_k` are *not* Exponential random variables, due to 92 the fact that the sum of Exponential random variables is not Exponential. 93 94 Instead, `Y` is a vector whose components are linear combinations of 95 Exponential random variables. Thus, `Y` lives in the vector space generated 96 by `vectors` of Exponential distributions. This allows the user to decide the 97 mean and covariance (by setting `loc` and `scale`), while preserving some 98 properties of the Exponential distribution. In particular, the tails of `Y_i` 99 will be (up to polynomial factors) exponentially decaying. 100 101 To see this last statement, note that the pdf of `Y_i` is the convolution of 102 the pdf of `k` independent Exponential random variables. One can then show by 103 induction that distributions with exponential (up to polynomial factors) tails 104 are closed under convolution. 105 106 107 #### Examples 108 109 ```python 110 tfd = tf.contrib.distributions 111 112 # Initialize a single 2-variate VectorExponential, supported on 113 # {(x, y) in R^2 : x > 0, y > 0}. 114 mat = [[1.0, 0.1], 115 [0.1, 1.0]] 116 117 vex = tfd.VectorExponentialLinearOperator( 118 scale=tf.linalg.LinearOperatorFullMatrix(mat)) 119 120 # Compute the pdf of an`R^2` observation; return a scalar. 121 vex.prob([1., 2.]).eval() # shape: [] 122 123 # Initialize a 2-batch of 3-variate Vector Exponential's. 124 mu = [[1., 2, 3], 125 [1., 0, 0]] # shape: [2, 3] 126 scale_diag = [[1., 2, 3], 127 [0.5, 1, 1.5]] # shape: [2, 3] 128 129 vex = tfd.VectorExponentialLinearOperator( 130 loc=mu, 131 scale=tf.linalg.LinearOperatorDiag(scale_diag)) 132 133 # Compute the pdf of two `R^3` observations; return a length-2 vector. 134 x = [[1.9, 2.2, 3.1], 135 [10., 1.0, 9.0]] # shape: [2, 3] 136 vex.prob(x).eval() # shape: [2] 137 ``` 138 139 """ 140 141 def __init__(self, 142 loc=None, 143 scale=None, 144 validate_args=False, 145 allow_nan_stats=True, 146 name="VectorExponentialLinearOperator"): 147 """Construct Vector Exponential distribution supported on a subset of `R^k`. 148 149 The `batch_shape` is the broadcast shape between `loc` and `scale` 150 arguments. 151 152 The `event_shape` is given by last dimension of the matrix implied by 153 `scale`. The last dimension of `loc` (if provided) must broadcast with this. 154 155 Recall that `covariance = scale @ scale.T`. 156 157 Additional leading dimensions (if any) will index batches. 158 159 Args: 160 loc: Floating-point `Tensor`. If this is set to `None`, `loc` is 161 implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where 162 `b >= 0` and `k` is the event size. 163 scale: Instance of `LinearOperator` with same `dtype` as `loc` and shape 164 `[B1, ..., Bb, k, k]`. 165 validate_args: Python `bool`, default `False`. Whether to validate input 166 with asserts. If `validate_args` is `False`, and the inputs are 167 invalid, correct behavior is not guaranteed. 168 allow_nan_stats: Python `bool`, default `True`. If `False`, raise an 169 exception if a statistic (e.g. mean/mode/etc...) is undefined for any 170 batch member If `True`, batch members with valid parameters leading to 171 undefined statistics will return NaN for this statistic. 172 name: The name to give Ops created by the initializer. 173 174 Raises: 175 ValueError: if `scale` is unspecified. 176 TypeError: if not `scale.dtype.is_floating` 177 """ 178 parameters = locals() 179 if scale is None: 180 raise ValueError("Missing required `scale` parameter.") 181 if not scale.dtype.is_floating: 182 raise TypeError("`scale` parameter must have floating-point dtype.") 183 184 with ops.name_scope(name, values=[loc] + scale.graph_parents): 185 # Since expand_dims doesn't preserve constant-ness, we obtain the 186 # non-dynamic value if possible. 187 loc = ops.convert_to_tensor(loc, name="loc") if loc is not None else loc 188 batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale( 189 loc, scale) 190 191 super(VectorExponentialLinearOperator, self).__init__( 192 distribution=exponential.Exponential(rate=array_ops.ones( 193 [], dtype=scale.dtype), allow_nan_stats=allow_nan_stats), 194 bijector=bijectors.AffineLinearOperator( 195 shift=loc, scale=scale, validate_args=validate_args), 196 batch_shape=batch_shape, 197 event_shape=event_shape, 198 validate_args=validate_args, 199 name=name) 200 self._parameters = parameters 201 202 @property 203 def loc(self): 204 """The `loc` `Tensor` in `Y = scale @ X + loc`.""" 205 return self.bijector.shift 206 207 @property 208 def scale(self): 209 """The `scale` `LinearOperator` in `Y = scale @ X + loc`.""" 210 return self.bijector.scale 211 212 @distribution_util.AppendDocstring(_mvn_sample_note) 213 def _log_prob(self, x): 214 return super(VectorExponentialLinearOperator, self)._log_prob(x) 215 216 @distribution_util.AppendDocstring(_mvn_sample_note) 217 def _prob(self, x): 218 return super(VectorExponentialLinearOperator, self)._prob(x) 219 220 def _mean(self): 221 # Let 222 # W = (w1,...,wk), with wj ~ iid Exponential(0, 1). 223 # Then this distribution is 224 # X = loc + LW, 225 # and then E[X] = loc + L1, where 1 is the vector of ones. 226 scale_x_ones = self.bijector.scale.matvec( 227 array_ops.ones(self._mode_mean_shape(), self.dtype)) 228 229 if self.loc is None: 230 return scale_x_ones 231 232 return array_ops.identity(self.loc) + scale_x_ones 233 234 def _covariance(self): 235 # Let 236 # W = (w1,...,wk), with wj ~ iid Exponential(0, 1). 237 # Then this distribution is 238 # X = loc + LW, 239 # and then since Cov(wi, wj) = 1 if i=j, and 0 otherwise, 240 # Cov(X) = L Cov(W W^T) L^T = L L^T. 241 if distribution_util.is_diagonal_scale(self.scale): 242 return array_ops.matrix_diag(math_ops.square(self.scale.diag_part())) 243 else: 244 return self.scale.matmul(self.scale.to_dense(), adjoint_arg=True) 245 246 def _variance(self): 247 if distribution_util.is_diagonal_scale(self.scale): 248 return math_ops.square(self.scale.diag_part()) 249 elif (isinstance(self.scale, linalg.LinearOperatorLowRankUpdate) and 250 self.scale.is_self_adjoint): 251 return array_ops.matrix_diag_part( 252 self.scale.matmul(self.scale.to_dense())) 253 else: 254 return array_ops.matrix_diag_part( 255 self.scale.matmul(self.scale.to_dense(), adjoint_arg=True)) 256 257 def _stddev(self): 258 if distribution_util.is_diagonal_scale(self.scale): 259 return math_ops.abs(self.scale.diag_part()) 260 elif (isinstance(self.scale, linalg.LinearOperatorLowRankUpdate) and 261 self.scale.is_self_adjoint): 262 return math_ops.sqrt( 263 array_ops.matrix_diag_part(self.scale.matmul(self.scale.to_dense()))) 264 else: 265 return math_ops.sqrt( 266 array_ops.matrix_diag_part( 267 self.scale.matmul(self.scale.to_dense(), adjoint_arg=True))) 268 269 def _mode(self): 270 scale_x_zeros = self.bijector.scale.matvec( 271 array_ops.zeros(self._mode_mean_shape(), self.dtype)) 272 273 if self.loc is None: 274 return scale_x_zeros 275 276 return array_ops.identity(self.loc) + scale_x_zeros 277 278 def _mode_mean_shape(self): 279 """Shape for the mode/mean Tensors.""" 280 shape = self.batch_shape.concatenate(self.event_shape) 281 has_static_shape = shape.is_fully_defined() 282 if not has_static_shape: 283 shape = array_ops.concat([ 284 self.batch_shape_tensor(), 285 self.event_shape_tensor(), 286 ], 0) 287 return shape 288