1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# =============================================================================
15
16# pylint: disable=unused-import,g-bad-import-order
17"""Contains the core layers: Dense, Dropout.
18
19Also contains their functional aliases.
20"""
21from __future__ import absolute_import
22from __future__ import division
23from __future__ import print_function
24
25
26import six
27from six.moves import xrange  # pylint: disable=redefined-builtin
28import numpy as np
29
30from tensorflow.python.eager import context
31from tensorflow.python.framework import ops
32from tensorflow.python.framework import tensor_shape
33from tensorflow.python.layers import base
34from tensorflow.python.layers import utils
35from tensorflow.python.ops import array_ops
36from tensorflow.python.ops import init_ops
37from tensorflow.python.ops import math_ops
38from tensorflow.python.ops import nn
39from tensorflow.python.ops import nn_ops
40from tensorflow.python.ops import standard_ops
41from tensorflow.python.util.tf_export import tf_export
42
43
44@tf_export('layers.Dense')
45class Dense(base.Layer):
46  """Densely-connected layer class.
47
48  This layer implements the operation:
49  `outputs = activation(inputs * kernel + bias)`
50  Where `activation` is the activation function passed as the `activation`
51  argument (if not `None`), `kernel` is a weights matrix created by the layer,
52  and `bias` is a bias vector created by the layer
53  (only if `use_bias` is `True`).
54
55  Arguments:
56    units: Integer or Long, dimensionality of the output space.
57    activation: Activation function (callable). Set it to None to maintain a
58      linear activation.
59    use_bias: Boolean, whether the layer uses a bias.
60    kernel_initializer: Initializer function for the weight matrix.
61      If `None` (default), weights are initialized using the default
62      initializer used by `tf.get_variable`.
63    bias_initializer: Initializer function for the bias.
64    kernel_regularizer: Regularizer function for the weight matrix.
65    bias_regularizer: Regularizer function for the bias.
66    activity_regularizer: Regularizer function for the output.
67    kernel_constraint: An optional projection function to be applied to the
68        kernel after being updated by an `Optimizer` (e.g. used to implement
69        norm constraints or value constraints for layer weights). The function
70        must take as input the unprojected variable and must return the
71        projected variable (which must have the same shape). Constraints are
72        not safe to use when doing asynchronous distributed training.
73    bias_constraint: An optional projection function to be applied to the
74        bias after being updated by an `Optimizer`.
75    trainable: Boolean, if `True` also add variables to the graph collection
76      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
77    name: String, the name of the layer. Layers with the same name will
78      share weights, but to avoid mistakes we require reuse=True in such cases.
79    reuse: Boolean, whether to reuse the weights of a previous layer
80      by the same name.
81
82  Properties:
83    units: Python integer, dimensionality of the output space.
84    activation: Activation function (callable).
85    use_bias: Boolean, whether the layer uses a bias.
86    kernel_initializer: Initializer instance (or name) for the kernel matrix.
87    bias_initializer: Initializer instance (or name) for the bias.
88    kernel_regularizer: Regularizer instance for the kernel matrix (callable)
89    bias_regularizer: Regularizer instance for the bias (callable).
90    activity_regularizer: Regularizer instance for the output (callable)
91    kernel_constraint: Constraint function for the kernel matrix.
92    bias_constraint: Constraint function for the bias.
93    kernel: Weight matrix (TensorFlow variable or tensor).
94    bias: Bias vector, if applicable (TensorFlow variable or tensor).
95  """
96
97  def __init__(self, units,
98               activation=None,
99               use_bias=True,
100               kernel_initializer=None,
101               bias_initializer=init_ops.zeros_initializer(),
102               kernel_regularizer=None,
103               bias_regularizer=None,
104               activity_regularizer=None,
105               kernel_constraint=None,
106               bias_constraint=None,
107               trainable=True,
108               name=None,
109               **kwargs):
110    super(Dense, self).__init__(trainable=trainable, name=name,
111                                activity_regularizer=activity_regularizer,
112                                **kwargs)
113    self.units = units
114    self.activation = activation
115    self.use_bias = use_bias
116    self.kernel_initializer = kernel_initializer
117    self.bias_initializer = bias_initializer
118    self.kernel_regularizer = kernel_regularizer
119    self.bias_regularizer = bias_regularizer
120    self.kernel_constraint = kernel_constraint
121    self.bias_constraint = bias_constraint
122    self.input_spec = base.InputSpec(min_ndim=2)
123
124  def build(self, input_shape):
125    input_shape = tensor_shape.TensorShape(input_shape)
126    if input_shape[-1].value is None:
127      raise ValueError('The last dimension of the inputs to `Dense` '
128                       'should be defined. Found `None`.')
129    self.input_spec = base.InputSpec(min_ndim=2,
130                                     axes={-1: input_shape[-1].value})
131    self.kernel = self.add_variable('kernel',
132                                    shape=[input_shape[-1].value, self.units],
133                                    initializer=self.kernel_initializer,
134                                    regularizer=self.kernel_regularizer,
135                                    constraint=self.kernel_constraint,
136                                    dtype=self.dtype,
137                                    trainable=True)
138    if self.use_bias:
139      self.bias = self.add_variable('bias',
140                                    shape=[self.units,],
141                                    initializer=self.bias_initializer,
142                                    regularizer=self.bias_regularizer,
143                                    constraint=self.bias_constraint,
144                                    dtype=self.dtype,
145                                    trainable=True)
146    else:
147      self.bias = None
148    self.built = True
149
150  def call(self, inputs):
151    inputs = ops.convert_to_tensor(inputs, dtype=self.dtype)
152    shape = inputs.get_shape().as_list()
153    if len(shape) > 2:
154      # Broadcasting is required for the inputs.
155      outputs = standard_ops.tensordot(inputs, self.kernel, [[len(shape) - 1],
156                                                             [0]])
157      # Reshape the output back to the original ndim of the input.
158      if context.in_graph_mode():
159        output_shape = shape[:-1] + [self.units]
160        outputs.set_shape(output_shape)
161    else:
162      outputs = standard_ops.matmul(inputs, self.kernel)
163    if self.use_bias:
164      outputs = nn.bias_add(outputs, self.bias)
165    if self.activation is not None:
166      return self.activation(outputs)  # pylint: disable=not-callable
167    return outputs
168
169  def compute_output_shape(self, input_shape):
170    input_shape = tensor_shape.TensorShape(input_shape)
171    input_shape = input_shape.with_rank_at_least(2)
172    if input_shape[-1].value is None:
173      raise ValueError(
174          'The innermost dimension of input_shape must be defined, but saw: %s'
175          % input_shape)
176    return input_shape[:-1].concatenate(self.units)
177
178
179@tf_export('layers.dense')
180def dense(
181    inputs, units,
182    activation=None,
183    use_bias=True,
184    kernel_initializer=None,
185    bias_initializer=init_ops.zeros_initializer(),
186    kernel_regularizer=None,
187    bias_regularizer=None,
188    activity_regularizer=None,
189    kernel_constraint=None,
190    bias_constraint=None,
191    trainable=True,
192    name=None,
193    reuse=None):
194  """Functional interface for the densely-connected layer.
195
196  This layer implements the operation:
197  `outputs = activation(inputs.kernel + bias)`
198  Where `activation` is the activation function passed as the `activation`
199  argument (if not `None`), `kernel` is a weights matrix created by the layer,
200  and `bias` is a bias vector created by the layer
201  (only if `use_bias` is `True`).
202
203  Arguments:
204    inputs: Tensor input.
205    units: Integer or Long, dimensionality of the output space.
206    activation: Activation function (callable). Set it to None to maintain a
207      linear activation.
208    use_bias: Boolean, whether the layer uses a bias.
209    kernel_initializer: Initializer function for the weight matrix.
210      If `None` (default), weights are initialized using the default
211      initializer used by `tf.get_variable`.
212    bias_initializer: Initializer function for the bias.
213    kernel_regularizer: Regularizer function for the weight matrix.
214    bias_regularizer: Regularizer function for the bias.
215    activity_regularizer: Regularizer function for the output.
216    kernel_constraint: An optional projection function to be applied to the
217        kernel after being updated by an `Optimizer` (e.g. used to implement
218        norm constraints or value constraints for layer weights). The function
219        must take as input the unprojected variable and must return the
220        projected variable (which must have the same shape). Constraints are
221        not safe to use when doing asynchronous distributed training.
222    bias_constraint: An optional projection function to be applied to the
223        bias after being updated by an `Optimizer`.
224    trainable: Boolean, if `True` also add variables to the graph collection
225      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
226    name: String, the name of the layer.
227    reuse: Boolean, whether to reuse the weights of a previous layer
228      by the same name.
229
230  Returns:
231    Output tensor the same shape as `inputs` except the last dimension is of
232    size `units`.
233
234  Raises:
235    ValueError: if eager execution is enabled.
236  """
237  layer = Dense(units,
238                activation=activation,
239                use_bias=use_bias,
240                kernel_initializer=kernel_initializer,
241                bias_initializer=bias_initializer,
242                kernel_regularizer=kernel_regularizer,
243                bias_regularizer=bias_regularizer,
244                activity_regularizer=activity_regularizer,
245                kernel_constraint=kernel_constraint,
246                bias_constraint=bias_constraint,
247                trainable=trainable,
248                name=name,
249                dtype=inputs.dtype.base_dtype,
250                _scope=name,
251                _reuse=reuse)
252  return layer.apply(inputs)
253
254
255@tf_export('layers.Dropout')
256class Dropout(base.Layer):
257  """Applies Dropout to the input.
258
259  Dropout consists in randomly setting a fraction `rate` of input units to 0
260  at each update during training time, which helps prevent overfitting.
261  The units that are kept are scaled by `1 / (1 - rate)`, so that their
262  sum is unchanged at training time and inference time.
263
264  Arguments:
265    rate: The dropout rate, between 0 and 1. E.g. `rate=0.1` would drop out
266      10% of input units.
267    noise_shape: 1D tensor of type `int32` representing the shape of the
268      binary dropout mask that will be multiplied with the input.
269      For instance, if your inputs have shape
270      `(batch_size, timesteps, features)`, and you want the dropout mask
271      to be the same for all timesteps, you can use
272      `noise_shape=[batch_size, 1, features]`.
273    seed: A Python integer. Used to create random seeds. See
274      @{tf.set_random_seed}.
275      for behavior.
276    name: The name of the layer (string).
277  """
278
279  def __init__(self, rate=0.5,
280               noise_shape=None,
281               seed=None,
282               name=None,
283               **kwargs):
284    super(Dropout, self).__init__(name=name, **kwargs)
285    self.rate = rate
286    self.noise_shape = noise_shape
287    self.seed = seed
288
289  def _get_noise_shape(self, inputs):
290    # Subclasses of `Dropout` may implement `_get_noise_shape(self, inputs)`,
291    # which will override `self.noise_shape`, and allows for custom noise
292    # shapes with dynamically sized inputs.
293    if self.noise_shape is None:
294      return self.noise_shape
295    return nn_ops._get_noise_shape(inputs, self.noise_shape)
296
297  def call(self, inputs, training=False):
298
299    def dropped_inputs():
300      return nn.dropout(inputs, 1  - self.rate,
301                        noise_shape=self._get_noise_shape(inputs),
302                        seed=self.seed)
303    return utils.smart_cond(training,
304                            dropped_inputs,
305                            lambda: array_ops.identity(inputs))
306
307  def compute_output_shape(self, input_shape):
308    return input_shape
309
310
311@tf_export('layers.dropout')
312def dropout(inputs,
313            rate=0.5,
314            noise_shape=None,
315            seed=None,
316            training=False,
317            name=None):
318  """Applies Dropout to the input.
319
320  Dropout consists in randomly setting a fraction `rate` of input units to 0
321  at each update during training time, which helps prevent overfitting.
322  The units that are kept are scaled by `1 / (1 - rate)`, so that their
323  sum is unchanged at training time and inference time.
324
325  Arguments:
326    inputs: Tensor input.
327    rate: The dropout rate, between 0 and 1. E.g. "rate=0.1" would drop out
328      10% of input units.
329    noise_shape: 1D tensor of type `int32` representing the shape of the
330      binary dropout mask that will be multiplied with the input.
331      For instance, if your inputs have shape
332      `(batch_size, timesteps, features)`, and you want the dropout mask
333      to be the same for all timesteps, you can use
334      `noise_shape=[batch_size, 1, features]`.
335    seed: A Python integer. Used to create random seeds. See
336      @{tf.set_random_seed}
337      for behavior.
338    training: Either a Python boolean, or a TensorFlow boolean scalar tensor
339      (e.g. a placeholder). Whether to return the output in training mode
340      (apply dropout) or in inference mode (return the input untouched).
341    name: The name of the layer (string).
342
343  Returns:
344    Output tensor.
345
346  Raises:
347    ValueError: if eager execution is enabled.
348  """
349  layer = Dropout(rate, noise_shape=noise_shape, seed=seed, name=name)
350  return layer.apply(inputs, training=training)
351
352
353@tf_export('layers.Flatten')
354class Flatten(base.Layer):
355  """Flattens an input tensor while preserving the batch axis (axis 0).
356
357  Examples:
358
359  ```
360    x = tf.placeholder(shape=(None, 4, 4), dtype='float32')
361    y = Flatten()(x)
362    # now `y` has shape `(None, 16)`
363
364    x = tf.placeholder(shape=(None, 3, None), dtype='float32')
365    y = Flatten()(x)
366    # now `y` has shape `(None, None)`
367  ```
368  """
369
370  def __init__(self, **kwargs):
371    super(Flatten, self).__init__(**kwargs)
372    self.input_spec = base.InputSpec(min_ndim=2)
373
374  def call(self, inputs):
375    outputs = array_ops.reshape(inputs, (array_ops.shape(inputs)[0], -1))
376    if context.in_graph_mode():
377      outputs.set_shape(self.compute_output_shape(inputs.get_shape()))
378    return outputs
379
380  def compute_output_shape(self, input_shape):
381    input_shape = tensor_shape.TensorShape(input_shape).as_list()
382    output_shape = [input_shape[0]]
383    if all(input_shape[1:]):
384      output_shape += [np.prod(input_shape[1:])]
385    else:
386      output_shape += [None]
387    return tensor_shape.TensorShape(output_shape)
388
389
390@tf_export('layers.flatten')
391def flatten(inputs, name=None):
392  """Flattens an input tensor while preserving the batch axis (axis 0).
393
394  Arguments:
395    inputs: Tensor input.
396    name: The name of the layer (string).
397
398  Returns:
399    Reshaped tensor.
400
401  Examples:
402
403  ```
404    x = tf.placeholder(shape=(None, 4, 4), dtype='float32')
405    y = flatten(x)
406    # now `y` has shape `(None, 16)`
407
408    x = tf.placeholder(shape=(None, 3, None), dtype='float32')
409    y = flatten(x)
410    # now `y` has shape `(None, None)`
411  ```
412  """
413  layer = Flatten(name=name)
414  return layer.apply(inputs)
415
416
417# Aliases
418
419FullyConnected = Dense
420fully_connected = dense
421