1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================= 15 16# pylint: disable=unused-import,g-bad-import-order 17"""Contains the core layers: Dense, Dropout. 18 19Also contains their functional aliases. 20""" 21from __future__ import absolute_import 22from __future__ import division 23from __future__ import print_function 24 25 26import six 27from six.moves import xrange # pylint: disable=redefined-builtin 28import numpy as np 29 30from tensorflow.python.eager import context 31from tensorflow.python.framework import ops 32from tensorflow.python.framework import tensor_shape 33from tensorflow.python.layers import base 34from tensorflow.python.layers import utils 35from tensorflow.python.ops import array_ops 36from tensorflow.python.ops import init_ops 37from tensorflow.python.ops import math_ops 38from tensorflow.python.ops import nn 39from tensorflow.python.ops import nn_ops 40from tensorflow.python.ops import standard_ops 41from tensorflow.python.util.tf_export import tf_export 42 43 44@tf_export('layers.Dense') 45class Dense(base.Layer): 46 """Densely-connected layer class. 47 48 This layer implements the operation: 49 `outputs = activation(inputs * kernel + bias)` 50 Where `activation` is the activation function passed as the `activation` 51 argument (if not `None`), `kernel` is a weights matrix created by the layer, 52 and `bias` is a bias vector created by the layer 53 (only if `use_bias` is `True`). 54 55 Arguments: 56 units: Integer or Long, dimensionality of the output space. 57 activation: Activation function (callable). Set it to None to maintain a 58 linear activation. 59 use_bias: Boolean, whether the layer uses a bias. 60 kernel_initializer: Initializer function for the weight matrix. 61 If `None` (default), weights are initialized using the default 62 initializer used by `tf.get_variable`. 63 bias_initializer: Initializer function for the bias. 64 kernel_regularizer: Regularizer function for the weight matrix. 65 bias_regularizer: Regularizer function for the bias. 66 activity_regularizer: Regularizer function for the output. 67 kernel_constraint: An optional projection function to be applied to the 68 kernel after being updated by an `Optimizer` (e.g. used to implement 69 norm constraints or value constraints for layer weights). The function 70 must take as input the unprojected variable and must return the 71 projected variable (which must have the same shape). Constraints are 72 not safe to use when doing asynchronous distributed training. 73 bias_constraint: An optional projection function to be applied to the 74 bias after being updated by an `Optimizer`. 75 trainable: Boolean, if `True` also add variables to the graph collection 76 `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). 77 name: String, the name of the layer. Layers with the same name will 78 share weights, but to avoid mistakes we require reuse=True in such cases. 79 reuse: Boolean, whether to reuse the weights of a previous layer 80 by the same name. 81 82 Properties: 83 units: Python integer, dimensionality of the output space. 84 activation: Activation function (callable). 85 use_bias: Boolean, whether the layer uses a bias. 86 kernel_initializer: Initializer instance (or name) for the kernel matrix. 87 bias_initializer: Initializer instance (or name) for the bias. 88 kernel_regularizer: Regularizer instance for the kernel matrix (callable) 89 bias_regularizer: Regularizer instance for the bias (callable). 90 activity_regularizer: Regularizer instance for the output (callable) 91 kernel_constraint: Constraint function for the kernel matrix. 92 bias_constraint: Constraint function for the bias. 93 kernel: Weight matrix (TensorFlow variable or tensor). 94 bias: Bias vector, if applicable (TensorFlow variable or tensor). 95 """ 96 97 def __init__(self, units, 98 activation=None, 99 use_bias=True, 100 kernel_initializer=None, 101 bias_initializer=init_ops.zeros_initializer(), 102 kernel_regularizer=None, 103 bias_regularizer=None, 104 activity_regularizer=None, 105 kernel_constraint=None, 106 bias_constraint=None, 107 trainable=True, 108 name=None, 109 **kwargs): 110 super(Dense, self).__init__(trainable=trainable, name=name, 111 activity_regularizer=activity_regularizer, 112 **kwargs) 113 self.units = units 114 self.activation = activation 115 self.use_bias = use_bias 116 self.kernel_initializer = kernel_initializer 117 self.bias_initializer = bias_initializer 118 self.kernel_regularizer = kernel_regularizer 119 self.bias_regularizer = bias_regularizer 120 self.kernel_constraint = kernel_constraint 121 self.bias_constraint = bias_constraint 122 self.input_spec = base.InputSpec(min_ndim=2) 123 124 def build(self, input_shape): 125 input_shape = tensor_shape.TensorShape(input_shape) 126 if input_shape[-1].value is None: 127 raise ValueError('The last dimension of the inputs to `Dense` ' 128 'should be defined. Found `None`.') 129 self.input_spec = base.InputSpec(min_ndim=2, 130 axes={-1: input_shape[-1].value}) 131 self.kernel = self.add_variable('kernel', 132 shape=[input_shape[-1].value, self.units], 133 initializer=self.kernel_initializer, 134 regularizer=self.kernel_regularizer, 135 constraint=self.kernel_constraint, 136 dtype=self.dtype, 137 trainable=True) 138 if self.use_bias: 139 self.bias = self.add_variable('bias', 140 shape=[self.units,], 141 initializer=self.bias_initializer, 142 regularizer=self.bias_regularizer, 143 constraint=self.bias_constraint, 144 dtype=self.dtype, 145 trainable=True) 146 else: 147 self.bias = None 148 self.built = True 149 150 def call(self, inputs): 151 inputs = ops.convert_to_tensor(inputs, dtype=self.dtype) 152 shape = inputs.get_shape().as_list() 153 if len(shape) > 2: 154 # Broadcasting is required for the inputs. 155 outputs = standard_ops.tensordot(inputs, self.kernel, [[len(shape) - 1], 156 [0]]) 157 # Reshape the output back to the original ndim of the input. 158 if context.in_graph_mode(): 159 output_shape = shape[:-1] + [self.units] 160 outputs.set_shape(output_shape) 161 else: 162 outputs = standard_ops.matmul(inputs, self.kernel) 163 if self.use_bias: 164 outputs = nn.bias_add(outputs, self.bias) 165 if self.activation is not None: 166 return self.activation(outputs) # pylint: disable=not-callable 167 return outputs 168 169 def compute_output_shape(self, input_shape): 170 input_shape = tensor_shape.TensorShape(input_shape) 171 input_shape = input_shape.with_rank_at_least(2) 172 if input_shape[-1].value is None: 173 raise ValueError( 174 'The innermost dimension of input_shape must be defined, but saw: %s' 175 % input_shape) 176 return input_shape[:-1].concatenate(self.units) 177 178 179@tf_export('layers.dense') 180def dense( 181 inputs, units, 182 activation=None, 183 use_bias=True, 184 kernel_initializer=None, 185 bias_initializer=init_ops.zeros_initializer(), 186 kernel_regularizer=None, 187 bias_regularizer=None, 188 activity_regularizer=None, 189 kernel_constraint=None, 190 bias_constraint=None, 191 trainable=True, 192 name=None, 193 reuse=None): 194 """Functional interface for the densely-connected layer. 195 196 This layer implements the operation: 197 `outputs = activation(inputs.kernel + bias)` 198 Where `activation` is the activation function passed as the `activation` 199 argument (if not `None`), `kernel` is a weights matrix created by the layer, 200 and `bias` is a bias vector created by the layer 201 (only if `use_bias` is `True`). 202 203 Arguments: 204 inputs: Tensor input. 205 units: Integer or Long, dimensionality of the output space. 206 activation: Activation function (callable). Set it to None to maintain a 207 linear activation. 208 use_bias: Boolean, whether the layer uses a bias. 209 kernel_initializer: Initializer function for the weight matrix. 210 If `None` (default), weights are initialized using the default 211 initializer used by `tf.get_variable`. 212 bias_initializer: Initializer function for the bias. 213 kernel_regularizer: Regularizer function for the weight matrix. 214 bias_regularizer: Regularizer function for the bias. 215 activity_regularizer: Regularizer function for the output. 216 kernel_constraint: An optional projection function to be applied to the 217 kernel after being updated by an `Optimizer` (e.g. used to implement 218 norm constraints or value constraints for layer weights). The function 219 must take as input the unprojected variable and must return the 220 projected variable (which must have the same shape). Constraints are 221 not safe to use when doing asynchronous distributed training. 222 bias_constraint: An optional projection function to be applied to the 223 bias after being updated by an `Optimizer`. 224 trainable: Boolean, if `True` also add variables to the graph collection 225 `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). 226 name: String, the name of the layer. 227 reuse: Boolean, whether to reuse the weights of a previous layer 228 by the same name. 229 230 Returns: 231 Output tensor the same shape as `inputs` except the last dimension is of 232 size `units`. 233 234 Raises: 235 ValueError: if eager execution is enabled. 236 """ 237 layer = Dense(units, 238 activation=activation, 239 use_bias=use_bias, 240 kernel_initializer=kernel_initializer, 241 bias_initializer=bias_initializer, 242 kernel_regularizer=kernel_regularizer, 243 bias_regularizer=bias_regularizer, 244 activity_regularizer=activity_regularizer, 245 kernel_constraint=kernel_constraint, 246 bias_constraint=bias_constraint, 247 trainable=trainable, 248 name=name, 249 dtype=inputs.dtype.base_dtype, 250 _scope=name, 251 _reuse=reuse) 252 return layer.apply(inputs) 253 254 255@tf_export('layers.Dropout') 256class Dropout(base.Layer): 257 """Applies Dropout to the input. 258 259 Dropout consists in randomly setting a fraction `rate` of input units to 0 260 at each update during training time, which helps prevent overfitting. 261 The units that are kept are scaled by `1 / (1 - rate)`, so that their 262 sum is unchanged at training time and inference time. 263 264 Arguments: 265 rate: The dropout rate, between 0 and 1. E.g. `rate=0.1` would drop out 266 10% of input units. 267 noise_shape: 1D tensor of type `int32` representing the shape of the 268 binary dropout mask that will be multiplied with the input. 269 For instance, if your inputs have shape 270 `(batch_size, timesteps, features)`, and you want the dropout mask 271 to be the same for all timesteps, you can use 272 `noise_shape=[batch_size, 1, features]`. 273 seed: A Python integer. Used to create random seeds. See 274 @{tf.set_random_seed}. 275 for behavior. 276 name: The name of the layer (string). 277 """ 278 279 def __init__(self, rate=0.5, 280 noise_shape=None, 281 seed=None, 282 name=None, 283 **kwargs): 284 super(Dropout, self).__init__(name=name, **kwargs) 285 self.rate = rate 286 self.noise_shape = noise_shape 287 self.seed = seed 288 289 def _get_noise_shape(self, inputs): 290 # Subclasses of `Dropout` may implement `_get_noise_shape(self, inputs)`, 291 # which will override `self.noise_shape`, and allows for custom noise 292 # shapes with dynamically sized inputs. 293 if self.noise_shape is None: 294 return self.noise_shape 295 return nn_ops._get_noise_shape(inputs, self.noise_shape) 296 297 def call(self, inputs, training=False): 298 299 def dropped_inputs(): 300 return nn.dropout(inputs, 1 - self.rate, 301 noise_shape=self._get_noise_shape(inputs), 302 seed=self.seed) 303 return utils.smart_cond(training, 304 dropped_inputs, 305 lambda: array_ops.identity(inputs)) 306 307 def compute_output_shape(self, input_shape): 308 return input_shape 309 310 311@tf_export('layers.dropout') 312def dropout(inputs, 313 rate=0.5, 314 noise_shape=None, 315 seed=None, 316 training=False, 317 name=None): 318 """Applies Dropout to the input. 319 320 Dropout consists in randomly setting a fraction `rate` of input units to 0 321 at each update during training time, which helps prevent overfitting. 322 The units that are kept are scaled by `1 / (1 - rate)`, so that their 323 sum is unchanged at training time and inference time. 324 325 Arguments: 326 inputs: Tensor input. 327 rate: The dropout rate, between 0 and 1. E.g. "rate=0.1" would drop out 328 10% of input units. 329 noise_shape: 1D tensor of type `int32` representing the shape of the 330 binary dropout mask that will be multiplied with the input. 331 For instance, if your inputs have shape 332 `(batch_size, timesteps, features)`, and you want the dropout mask 333 to be the same for all timesteps, you can use 334 `noise_shape=[batch_size, 1, features]`. 335 seed: A Python integer. Used to create random seeds. See 336 @{tf.set_random_seed} 337 for behavior. 338 training: Either a Python boolean, or a TensorFlow boolean scalar tensor 339 (e.g. a placeholder). Whether to return the output in training mode 340 (apply dropout) or in inference mode (return the input untouched). 341 name: The name of the layer (string). 342 343 Returns: 344 Output tensor. 345 346 Raises: 347 ValueError: if eager execution is enabled. 348 """ 349 layer = Dropout(rate, noise_shape=noise_shape, seed=seed, name=name) 350 return layer.apply(inputs, training=training) 351 352 353@tf_export('layers.Flatten') 354class Flatten(base.Layer): 355 """Flattens an input tensor while preserving the batch axis (axis 0). 356 357 Examples: 358 359 ``` 360 x = tf.placeholder(shape=(None, 4, 4), dtype='float32') 361 y = Flatten()(x) 362 # now `y` has shape `(None, 16)` 363 364 x = tf.placeholder(shape=(None, 3, None), dtype='float32') 365 y = Flatten()(x) 366 # now `y` has shape `(None, None)` 367 ``` 368 """ 369 370 def __init__(self, **kwargs): 371 super(Flatten, self).__init__(**kwargs) 372 self.input_spec = base.InputSpec(min_ndim=2) 373 374 def call(self, inputs): 375 outputs = array_ops.reshape(inputs, (array_ops.shape(inputs)[0], -1)) 376 if context.in_graph_mode(): 377 outputs.set_shape(self.compute_output_shape(inputs.get_shape())) 378 return outputs 379 380 def compute_output_shape(self, input_shape): 381 input_shape = tensor_shape.TensorShape(input_shape).as_list() 382 output_shape = [input_shape[0]] 383 if all(input_shape[1:]): 384 output_shape += [np.prod(input_shape[1:])] 385 else: 386 output_shape += [None] 387 return tensor_shape.TensorShape(output_shape) 388 389 390@tf_export('layers.flatten') 391def flatten(inputs, name=None): 392 """Flattens an input tensor while preserving the batch axis (axis 0). 393 394 Arguments: 395 inputs: Tensor input. 396 name: The name of the layer (string). 397 398 Returns: 399 Reshaped tensor. 400 401 Examples: 402 403 ``` 404 x = tf.placeholder(shape=(None, 4, 4), dtype='float32') 405 y = flatten(x) 406 # now `y` has shape `(None, 16)` 407 408 x = tf.placeholder(shape=(None, 3, None), dtype='float32') 409 y = flatten(x) 410 # now `y` has shape `(None, None)` 411 ``` 412 """ 413 layer = Flatten(name=name) 414 return layer.apply(inputs) 415 416 417# Aliases 418 419FullyConnected = Dense 420fully_connected = dense 421