1# Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Vector Student's t distribution classes.""" 16 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21from tensorflow.contrib.distributions.python.ops import bijectors 22from tensorflow.contrib.distributions.python.ops import distribution_util 23from tensorflow.python.framework import constant_op 24from tensorflow.python.framework import dtypes 25from tensorflow.python.framework import ops 26from tensorflow.python.ops import array_ops 27from tensorflow.python.ops.distributions import student_t 28from tensorflow.python.ops.distributions import transformed_distribution 29 30 31class _VectorStudentT(transformed_distribution.TransformedDistribution): 32 """A vector version of Student's t-distribution on `R^k`. 33 34 #### Mathematical details 35 36 The probability density function (pdf) is, 37 38 ```none 39 pdf(x; df, mu, Sigma) = (1 + ||y||**2 / df)**(-0.5 (df + 1)) / Z 40 where, 41 y = inv(Sigma) (x - mu) 42 Z = abs(det(Sigma)) ( sqrt(df pi) Gamma(0.5 df) / Gamma(0.5 (df + 1)) )**k 43 ``` 44 45 where: 46 * `loc = mu`; a vector in `R^k`, 47 * `scale = Sigma`; a lower-triangular matrix in `R^{k x k}`, 48 * `Z` denotes the normalization constant, and, 49 * `Gamma` is the [gamma function]( 50 https://en.wikipedia.org/wiki/Gamma_function), and, 51 * `||y||**2` denotes the [squared Euclidean norm]( 52 https://en.wikipedia.org/wiki/Norm_(mathematics)#Euclidean_norm) of `y`. 53 54 The VectorStudentT distribution is a member of the [location-scale family]( 55 https://en.wikipedia.org/wiki/Location-scale_family), i.e., it can be 56 constructed as, 57 58 ```none 59 X ~ StudentT(df, loc=0, scale=1) 60 Y = loc + scale * X 61 ``` 62 63 Notice that the `scale` matrix has semantics closer to std. deviation than 64 covariance (but it is not std. deviation). 65 66 This distribution is an Affine transformation of iid 67 [Student's t-distributions]( 68 https://en.wikipedia.org/wiki/Student%27s_t-distribution) 69 and should not be confused with the [Multivate Student's t-distribution]( 70 https://en.wikipedia.org/wiki/Multivariate_t-distribution). The 71 traditional Multivariate Student's t-distribution is type of 72 [elliptical distribution]( 73 https://en.wikipedia.org/wiki/Elliptical_distribution); it has PDF: 74 75 ```none 76 pdf(x; df, mu, Sigma) = (1 + ||y||**2 / df)**(-0.5 (df + k)) / Z 77 where, 78 y = inv(Sigma) (x - mu) 79 Z = abs(det(Sigma)) sqrt(df pi)**k Gamma(0.5 df) / Gamma(0.5 (df + k)) 80 ``` 81 82 Notice that the Multivariate Student's t-distribution uses `k` where the 83 Vector Student's t-distribution has a `1`. Conversely the Vector version has a 84 broader application of the power-`k` in the normalization constant. 85 86 #### Examples 87 88 A single instance of a "Vector Student's t-distribution" is defined by a mean 89 vector of length `k` and a scale matrix of shape `k x k`. 90 91 Extra leading dimensions, if provided, allow for batches. 92 93 ```python 94 tfd = tf.contrib.distributions 95 96 # Initialize a single 3-variate vector Student's t-distribution. 97 mu = [1., 2, 3] 98 chol = [[1., 0, 0.], 99 [1, 3, 0], 100 [1, 2, 3]] 101 vt = tfd.VectorStudentT(df=2, loc=mu, scale_tril=chol) 102 103 # Evaluate this on an observation in R^3, returning a scalar. 104 vt.prob([-1., 0, 1]) 105 106 # Initialize a batch of two 3-variate vector Student's t-distributions. 107 mu = [[1., 2, 3], 108 [11, 22, 33]] 109 chol = ... # shape 2 x 3 x 3, lower triangular, positive diagonal. 110 vt = tfd.VectorStudentT(loc=mu, scale_tril=chol) 111 112 # Evaluate this on a two observations, each in R^3, returning a length two 113 # tensor. 114 x = [[-1, 0, 1], 115 [-11, 0, 11]] 116 vt.prob(x) 117 ``` 118 119 For more examples of how to construct the `scale` matrix, see the 120 `tf.contrib.distributions.bijectors.Affine` docstring. 121 122 """ 123 124 def __init__(self, 125 df, 126 loc=None, 127 scale_identity_multiplier=None, 128 scale_diag=None, 129 scale_tril=None, 130 scale_perturb_factor=None, 131 scale_perturb_diag=None, 132 validate_args=False, 133 allow_nan_stats=True, 134 name="VectorStudentT"): 135 """Instantiates the vector Student's t-distributions on `R^k`. 136 137 The `batch_shape` is the broadcast between `df.batch_shape` and 138 `Affine.batch_shape` where `Affine` is constructed from `loc` and 139 `scale_*` arguments. 140 141 The `event_shape` is the event shape of `Affine.event_shape`. 142 143 Args: 144 df: Floating-point `Tensor`. The degrees of freedom of the 145 distribution(s). `df` must contain only positive values. Must be 146 scalar if `loc`, `scale_*` imply non-scalar batch_shape or must have the 147 same `batch_shape` implied by `loc`, `scale_*`. 148 loc: Floating-point `Tensor`. If this is set to `None`, no `loc` is 149 applied. 150 scale_identity_multiplier: floating point rank 0 `Tensor` representing a 151 scaling done to the identity matrix. When `scale_identity_multiplier = 152 scale_diag=scale_tril = None` then `scale += IdentityMatrix`. Otherwise 153 no scaled-identity-matrix is added to `scale`. 154 scale_diag: Floating-point `Tensor` representing the diagonal matrix. 155 `scale_diag` has shape [N1, N2, ..., k], which represents a k x k 156 diagonal matrix. When `None` no diagonal term is added to `scale`. 157 scale_tril: Floating-point `Tensor` representing the diagonal matrix. 158 `scale_diag` has shape [N1, N2, ..., k, k], which represents a k x k 159 lower triangular matrix. When `None` no `scale_tril` term is added to 160 `scale`. The upper triangular elements above the diagonal are ignored. 161 scale_perturb_factor: Floating-point `Tensor` representing factor matrix 162 with last two dimensions of shape `(k, r)`. When `None`, no rank-r 163 update is added to `scale`. 164 scale_perturb_diag: Floating-point `Tensor` representing the diagonal 165 matrix. `scale_perturb_diag` has shape [N1, N2, ..., r], which 166 represents an r x r Diagonal matrix. When `None` low rank updates will 167 take the form `scale_perturb_factor * scale_perturb_factor.T`. 168 validate_args: Python `bool`, default `False`. When `True` distribution 169 parameters are checked for validity despite possibly degrading runtime 170 performance. When `False` invalid inputs may silently render incorrect 171 outputs. 172 allow_nan_stats: Python `bool`, default `True`. When `True`, 173 statistics (e.g., mean, mode, variance) use the value "`NaN`" to 174 indicate the result is undefined. When `False`, an exception is raised 175 if one or more of the statistic's batch members are undefined. 176 name: Python `str` name prefixed to Ops created by this class. 177 """ 178 parameters = locals() 179 graph_parents = [df, loc, scale_identity_multiplier, scale_diag, 180 scale_tril, scale_perturb_factor, scale_perturb_diag] 181 with ops.name_scope(name): 182 with ops.name_scope("init", values=graph_parents): 183 # The shape of the _VectorStudentT distribution is governed by the 184 # relationship between df.batch_shape and affine.batch_shape. In 185 # pseudocode the basic procedure is: 186 # if df.batch_shape is scalar: 187 # if affine.batch_shape is not scalar: 188 # # broadcast distribution.sample so 189 # # it has affine.batch_shape. 190 # self.batch_shape = affine.batch_shape 191 # else: 192 # if affine.batch_shape is scalar: 193 # # let affine broadcasting do its thing. 194 # self.batch_shape = df.batch_shape 195 # All of the above magic is actually handled by TransformedDistribution. 196 # Here we really only need to collect the affine.batch_shape and decide 197 # what we're going to pass in to TransformedDistribution's 198 # (override) batch_shape arg. 199 affine = bijectors.Affine( 200 shift=loc, 201 scale_identity_multiplier=scale_identity_multiplier, 202 scale_diag=scale_diag, 203 scale_tril=scale_tril, 204 scale_perturb_factor=scale_perturb_factor, 205 scale_perturb_diag=scale_perturb_diag, 206 validate_args=validate_args) 207 distribution = student_t.StudentT( 208 df=df, 209 loc=array_ops.zeros([], dtype=affine.dtype), 210 scale=array_ops.ones([], dtype=affine.dtype)) 211 batch_shape, override_event_shape = ( 212 distribution_util.shapes_from_loc_and_scale( 213 affine.shift, affine.scale)) 214 override_batch_shape = distribution_util.pick_vector( 215 distribution.is_scalar_batch(), 216 batch_shape, 217 constant_op.constant([], dtype=dtypes.int32)) 218 super(_VectorStudentT, self).__init__( 219 distribution=distribution, 220 bijector=affine, 221 batch_shape=override_batch_shape, 222 event_shape=override_event_shape, 223 validate_args=validate_args, 224 name=name) 225 self._parameters = parameters 226 227 @property 228 def df(self): 229 """Degrees of freedom in these Student's t distribution(s).""" 230 return self.distribution.df 231 232 @property 233 def loc(self): 234 """Locations of these Student's t distribution(s).""" 235 return self.bijector.shift 236 237 @property 238 def scale(self): 239 """Dense (batch) covariance matrix, if available.""" 240 return self.bijector.scale 241