1# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Vectorized Exponential distribution class, directly using LinearOperator."""
16
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
20
21from tensorflow.contrib.distributions.python.ops import bijectors
22from tensorflow.contrib.distributions.python.ops import distribution_util
23from tensorflow.python.framework import ops
24from tensorflow.python.ops import array_ops
25from tensorflow.python.ops import math_ops
26from tensorflow.python.ops.distributions import exponential
27from tensorflow.python.ops.distributions import transformed_distribution
28from tensorflow.python.ops.linalg import linalg
29
30__all__ = ["VectorExponentialLinearOperator"]
31
32_mvn_sample_note = """
33`value` is a batch vector with compatible shape if `value` is a `Tensor` whose
34shape can be broadcast up to either:
35
36```python
37self.batch_shape + self.event_shape
38```
39
40or
41
42```python
43[M1, ..., Mm] + self.batch_shape + self.event_shape
44```
45
46"""
47
48
49class VectorExponentialLinearOperator(
50    transformed_distribution.TransformedDistribution):
51  """The vectorization of the Exponential distribution on `R^k`.
52
53  The vector exponential distribution is defined over a subset of `R^k`, and
54  parameterized by a (batch of) length-`k` `loc` vector and a (batch of) `k x k`
55  `scale` matrix:  `covariance = scale @ scale.T`, where `@` denotes
56  matrix-multiplication.
57
58  #### Mathematical Details
59
60  The probability density function (pdf) is
61
62  ```none
63  pdf(y; loc, scale) = exp(-||x||_1) / Z,  for y in S(loc, scale),
64  x = inv(scale) @ (y - loc),
65  Z = |det(scale)|,
66  ```
67
68  where:
69
70  * `loc` is a vector in `R^k`,
71  * `scale` is a linear operator in `R^{k x k}`, `cov = scale @ scale.T`,
72  * `S = {loc + scale @ x : x in R^k, x_1 > 0, ..., x_k > 0}`, is an image of
73     the positive half-space,
74  * `||x||_1` denotes the `l1` norm of `x`, `sum_i |x_i|`,
75  * `Z` denotes the normalization constant.
76
77  The VectorExponential distribution is a member of the [location-scale
78  family](https://en.wikipedia.org/wiki/Location-scale_family), i.e., it can be
79  constructed as,
80
81  ```none
82  X = (X_1, ..., X_k), each X_i ~ Exponential(rate=1)
83  Y = (Y_1, ...,Y_k) = scale @ X + loc
84  ```
85
86  #### About `VectorExponential` and `Vector` distributions in TensorFlow.
87
88  The `VectorExponential` is a non-standard distribution that has useful
89  properties.
90
91  The marginals `Y_1, ..., Y_k` are *not* Exponential random variables, due to
92  the fact that the sum of Exponential random variables is not Exponential.
93
94  Instead, `Y` is a vector whose components are linear combinations of
95  Exponential random variables.  Thus, `Y` lives in the vector space generated
96  by `vectors` of Exponential distributions.  This allows the user to decide the
97  mean and covariance (by setting `loc` and `scale`), while preserving some
98  properties of the Exponential distribution.  In particular, the tails of `Y_i`
99  will be (up to polynomial factors) exponentially decaying.
100
101  To see this last statement, note that the pdf of `Y_i` is the convolution of
102  the pdf of `k` independent Exponential random variables.  One can then show by
103  induction that distributions with exponential (up to polynomial factors) tails
104  are closed under convolution.
105
106
107  #### Examples
108
109  ```python
110  tfd = tf.contrib.distributions
111
112  # Initialize a single 2-variate VectorExponential, supported on
113  # {(x, y) in R^2 : x > 0, y > 0}.
114  mat = [[1.0, 0.1],
115         [0.1, 1.0]]
116
117  vex = tfd.VectorExponentialLinearOperator(
118      scale=tf.linalg.LinearOperatorFullMatrix(mat))
119
120  # Compute the pdf of an`R^2` observation; return a scalar.
121  vex.prob([1., 2.]).eval()  # shape: []
122
123  # Initialize a 2-batch of 3-variate Vector Exponential's.
124  mu = [[1., 2, 3],
125        [1., 0, 0]]              # shape: [2, 3]
126  scale_diag = [[1., 2, 3],
127                [0.5, 1, 1.5]]     # shape: [2, 3]
128
129  vex = tfd.VectorExponentialLinearOperator(
130      loc=mu,
131      scale=tf.linalg.LinearOperatorDiag(scale_diag))
132
133  # Compute the pdf of two `R^3` observations; return a length-2 vector.
134  x = [[1.9, 2.2, 3.1],
135       [10., 1.0, 9.0]]     # shape: [2, 3]
136  vex.prob(x).eval()    # shape: [2]
137  ```
138
139  """
140
141  def __init__(self,
142               loc=None,
143               scale=None,
144               validate_args=False,
145               allow_nan_stats=True,
146               name="VectorExponentialLinearOperator"):
147    """Construct Vector Exponential distribution supported on a subset of `R^k`.
148
149    The `batch_shape` is the broadcast shape between `loc` and `scale`
150    arguments.
151
152    The `event_shape` is given by last dimension of the matrix implied by
153    `scale`. The last dimension of `loc` (if provided) must broadcast with this.
154
155    Recall that `covariance = scale @ scale.T`.
156
157    Additional leading dimensions (if any) will index batches.
158
159    Args:
160      loc: Floating-point `Tensor`. If this is set to `None`, `loc` is
161        implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where
162        `b >= 0` and `k` is the event size.
163      scale: Instance of `LinearOperator` with same `dtype` as `loc` and shape
164        `[B1, ..., Bb, k, k]`.
165      validate_args: Python `bool`, default `False`. Whether to validate input
166        with asserts. If `validate_args` is `False`, and the inputs are
167        invalid, correct behavior is not guaranteed.
168      allow_nan_stats: Python `bool`, default `True`. If `False`, raise an
169        exception if a statistic (e.g. mean/mode/etc...) is undefined for any
170        batch member If `True`, batch members with valid parameters leading to
171        undefined statistics will return NaN for this statistic.
172      name: The name to give Ops created by the initializer.
173
174    Raises:
175      ValueError: if `scale` is unspecified.
176      TypeError: if not `scale.dtype.is_floating`
177    """
178    parameters = locals()
179    if scale is None:
180      raise ValueError("Missing required `scale` parameter.")
181    if not scale.dtype.is_floating:
182      raise TypeError("`scale` parameter must have floating-point dtype.")
183
184    with ops.name_scope(name, values=[loc] + scale.graph_parents):
185      # Since expand_dims doesn't preserve constant-ness, we obtain the
186      # non-dynamic value if possible.
187      loc = ops.convert_to_tensor(loc, name="loc") if loc is not None else loc
188      batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale(
189          loc, scale)
190
191      super(VectorExponentialLinearOperator, self).__init__(
192          distribution=exponential.Exponential(rate=array_ops.ones(
193              [], dtype=scale.dtype), allow_nan_stats=allow_nan_stats),
194          bijector=bijectors.AffineLinearOperator(
195              shift=loc, scale=scale, validate_args=validate_args),
196          batch_shape=batch_shape,
197          event_shape=event_shape,
198          validate_args=validate_args,
199          name=name)
200      self._parameters = parameters
201
202  @property
203  def loc(self):
204    """The `loc` `Tensor` in `Y = scale @ X + loc`."""
205    return self.bijector.shift
206
207  @property
208  def scale(self):
209    """The `scale` `LinearOperator` in `Y = scale @ X + loc`."""
210    return self.bijector.scale
211
212  @distribution_util.AppendDocstring(_mvn_sample_note)
213  def _log_prob(self, x):
214    return super(VectorExponentialLinearOperator, self)._log_prob(x)
215
216  @distribution_util.AppendDocstring(_mvn_sample_note)
217  def _prob(self, x):
218    return super(VectorExponentialLinearOperator, self)._prob(x)
219
220  def _mean(self):
221    # Let
222    #   W = (w1,...,wk), with wj ~ iid Exponential(0, 1).
223    # Then this distribution is
224    #   X = loc + LW,
225    # and then E[X] = loc + L1, where 1 is the vector of ones.
226    scale_x_ones = self.bijector.scale.matvec(
227        array_ops.ones(self._mode_mean_shape(), self.dtype))
228
229    if self.loc is None:
230      return scale_x_ones
231
232    return array_ops.identity(self.loc) + scale_x_ones
233
234  def _covariance(self):
235    # Let
236    #   W = (w1,...,wk), with wj ~ iid Exponential(0, 1).
237    # Then this distribution is
238    #   X = loc + LW,
239    # and then since Cov(wi, wj) = 1 if i=j, and 0 otherwise,
240    #   Cov(X) = L Cov(W W^T) L^T = L L^T.
241    if distribution_util.is_diagonal_scale(self.scale):
242      return array_ops.matrix_diag(math_ops.square(self.scale.diag_part()))
243    else:
244      return self.scale.matmul(self.scale.to_dense(), adjoint_arg=True)
245
246  def _variance(self):
247    if distribution_util.is_diagonal_scale(self.scale):
248      return math_ops.square(self.scale.diag_part())
249    elif (isinstance(self.scale, linalg.LinearOperatorLowRankUpdate) and
250          self.scale.is_self_adjoint):
251      return array_ops.matrix_diag_part(
252          self.scale.matmul(self.scale.to_dense()))
253    else:
254      return array_ops.matrix_diag_part(
255          self.scale.matmul(self.scale.to_dense(), adjoint_arg=True))
256
257  def _stddev(self):
258    if distribution_util.is_diagonal_scale(self.scale):
259      return math_ops.abs(self.scale.diag_part())
260    elif (isinstance(self.scale, linalg.LinearOperatorLowRankUpdate) and
261          self.scale.is_self_adjoint):
262      return math_ops.sqrt(
263          array_ops.matrix_diag_part(self.scale.matmul(self.scale.to_dense())))
264    else:
265      return math_ops.sqrt(
266          array_ops.matrix_diag_part(
267              self.scale.matmul(self.scale.to_dense(), adjoint_arg=True)))
268
269  def _mode(self):
270    scale_x_zeros = self.bijector.scale.matvec(
271        array_ops.zeros(self._mode_mean_shape(), self.dtype))
272
273    if self.loc is None:
274      return scale_x_zeros
275
276    return array_ops.identity(self.loc) + scale_x_zeros
277
278  def _mode_mean_shape(self):
279    """Shape for the mode/mean Tensors."""
280    shape = self.batch_shape.concatenate(self.event_shape)
281    has_static_shape = shape.is_fully_defined()
282    if not has_static_shape:
283      shape = array_ops.concat([
284          self.batch_shape_tensor(),
285          self.event_shape_tensor(),
286      ], 0)
287    return shape
288