1# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Tests for io_utils."""
16
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
20
21import os
22import shutil
23
24import numpy as np
25
26from tensorflow.python.keras._impl import keras
27from tensorflow.python.platform import test
28
29try:
30  import h5py  # pylint:disable=g-import-not-at-top
31except ImportError:
32  h5py = None
33
34
35def create_dataset(h5_path='test.h5'):
36  x = np.random.randn(200, 10).astype('float32')
37  y = np.random.randint(0, 2, size=(200, 1))
38  f = h5py.File(h5_path, 'w')
39  # Creating dataset to store features
40  x_dset = f.create_dataset('my_data', (200, 10), dtype='f')
41  x_dset[:] = x
42  # Creating dataset to store labels
43  y_dset = f.create_dataset('my_labels', (200, 1), dtype='i')
44  y_dset[:] = y
45  f.close()
46
47
48class TestIOUtils(test.TestCase):
49
50  def test_HDF5Matrix(self):
51    if h5py is None:
52      return
53
54    temp_dir = self.get_temp_dir()
55    self.addCleanup(shutil.rmtree, temp_dir)
56
57    h5_path = os.path.join(temp_dir, 'test.h5')
58    create_dataset(h5_path)
59
60    # Instantiating HDF5Matrix for the training set,
61    # which is a slice of the first 150 elements
62    x_train = keras.utils.io_utils.HDF5Matrix(
63        h5_path, 'my_data', start=0, end=150)
64    y_train = keras.utils.io_utils.HDF5Matrix(
65        h5_path, 'my_labels', start=0, end=150)
66
67    # Likewise for the test set
68    x_test = keras.utils.io_utils.HDF5Matrix(
69        h5_path, 'my_data', start=150, end=200)
70    y_test = keras.utils.io_utils.HDF5Matrix(
71        h5_path, 'my_labels', start=150, end=200)
72
73    # HDF5Matrix behave more or less like Numpy matrices
74    # with regard to indexing
75    self.assertEqual(y_train.shape, (150, 1))
76    # But they do not support negative indices, so don't try print(x_train[-1])
77
78    self.assertEqual(y_train.dtype, np.dtype('i'))
79    self.assertEqual(y_train.ndim, 2)
80    self.assertEqual(y_train.size, 150)
81
82    model = keras.models.Sequential()
83    model.add(keras.layers.Dense(64, input_shape=(10,), activation='relu'))
84    model.add(keras.layers.Dense(1, activation='sigmoid'))
85    model.compile(loss='binary_crossentropy', optimizer='sgd')
86
87    # Note: you have to use shuffle='batch' or False with HDF5Matrix
88    model.fit(x_train, y_train, batch_size=32, shuffle='batch', verbose=False)
89    # test that evalutation and prediction
90    # don't crash and return reasonable results
91    out_pred = model.predict(x_test, batch_size=32, verbose=False)
92    out_eval = model.evaluate(x_test, y_test, batch_size=32, verbose=False)
93
94    self.assertEqual(out_pred.shape, (50, 1))
95    self.assertEqual(out_eval.shape, ())
96    self.assertGreater(out_eval, 0)
97
98
99if __name__ == '__main__':
100  test.main()
101