18d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# Copyright (c) 2012 Amazon.com, Inc. or its affiliates.  All Rights Reserved
28d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi#
38d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# Permission is hereby granted, free of charge, to any person obtaining a
48d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# copy of this software and associated documentation files (the
58d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# "Software"), to deal in the Software without restriction, including
68d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# without limitation the rights to use, copy, modify, merge, publish, dis-
78d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# tribute, sublicense, and/or sell copies of the Software, and to permit
88d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# persons to whom the Software is furnished to do so, subject to the fol-
98d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# lowing conditions:
108d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi#
118d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# The above copyright notice and this permission notice shall be included
128d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# in all copies or substantial portions of the Software.
138d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi#
148d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
158d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
168d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
178d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
188d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
198d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
208d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# IN THE SOFTWARE.
218d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi#
228d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoiimport logging
238d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoiimport os
248d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoiimport tempfile
258d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoiimport time
268d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoifrom hashlib import sha256
278d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoifrom tests.unit import unittest
288d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi
298d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoifrom boto.compat import BytesIO, six, StringIO
308d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoifrom boto.glacier.utils import minimum_part_size, chunk_hashes, tree_hash, \
318d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        bytes_to_hex, compute_hashes_from_fileobj
328d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi
338d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi
348d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoiclass TestPartSizeCalculations(unittest.TestCase):
358d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi    def test_small_values_still_use_default_part_size(self):
368d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        self.assertEqual(minimum_part_size(1), 4 * 1024 * 1024)
378d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi
388d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi    def test_under_the_maximum_value(self):
398d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        # If we're under the maximum, we can use 4MB part sizes.
408d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        self.assertEqual(minimum_part_size(8 * 1024 * 1024),
418d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi                         4 * 1024 * 1024)
428d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi
438d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi    def test_gigabyte_size(self):
448d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        # If we're over the maximum default part size, we go up to the next
458d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        # power of two until we find a part size that keeps us under 10,000
468d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        # parts.
478d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        self.assertEqual(minimum_part_size(8 * 1024 * 1024 * 10000),
488d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi                         8 * 1024 * 1024)
498d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi
508d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi    def test_terabyte_size(self):
518d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        # For a 4 TB file we need at least a 512 MB part size.
528d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        self.assertEqual(minimum_part_size(4 * 1024 * 1024 * 1024 * 1024),
538d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi                         512 * 1024 * 1024)
548d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi
558d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi    def test_file_size_too_large(self):
568d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        with self.assertRaises(ValueError):
578d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi            minimum_part_size((40000 * 1024 * 1024 * 1024) + 1)
588d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi
598d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi    def test_default_part_size_can_be_specified(self):
608d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        default_part_size = 2 * 1024 * 1024
618d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        self.assertEqual(minimum_part_size(8 * 1024 * 1024, default_part_size),
628d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi                         default_part_size)
638d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi
648d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi
658d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoiclass TestChunking(unittest.TestCase):
668d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi    def test_chunk_hashes_exact(self):
678d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        chunks = chunk_hashes(b'a' * (2 * 1024 * 1024))
688d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        self.assertEqual(len(chunks), 2)
698d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        self.assertEqual(chunks[0], sha256(b'a' * 1024 * 1024).digest())
708d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi
718d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi    def test_chunks_with_leftovers(self):
728d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        bytestring = b'a' * (2 * 1024 * 1024 + 20)
738d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        chunks = chunk_hashes(bytestring)
748d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        self.assertEqual(len(chunks), 3)
758d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        self.assertEqual(chunks[0], sha256(b'a' * 1024 * 1024).digest())
768d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        self.assertEqual(chunks[1], sha256(b'a' * 1024 * 1024).digest())
778d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        self.assertEqual(chunks[2], sha256(b'a' * 20).digest())
788d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi
798d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi    def test_less_than_one_chunk(self):
808d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        chunks = chunk_hashes(b'aaaa')
818d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        self.assertEqual(len(chunks), 1)
828d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        self.assertEqual(chunks[0], sha256(b'aaaa').digest())
838d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi
848d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi
858d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoiclass TestTreeHash(unittest.TestCase):
868d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi    # For these tests, a set of reference tree hashes were computed.
878d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi    # This will at least catch any regressions to the tree hash
888d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi    # calculations.
898d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi    def calculate_tree_hash(self, bytestring):
908d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        start = time.time()
918d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        calculated = bytes_to_hex(tree_hash(chunk_hashes(bytestring)))
928d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        end = time.time()
938d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        logging.debug("Tree hash calc time for length %s: %s",
948d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi                      len(bytestring), end - start)
958d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        return calculated
968d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi
978d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi    def test_tree_hash_calculations(self):
988d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        one_meg_bytestring = b'a' * (1 * 1024 * 1024)
998d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        two_meg_bytestring = b'a' * (2 * 1024 * 1024)
1008d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        four_meg_bytestring = b'a' * (4 * 1024 * 1024)
1018d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        bigger_bytestring = four_meg_bytestring + b'a' * 20
1028d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi
1038d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        self.assertEqual(
1048d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi            self.calculate_tree_hash(one_meg_bytestring),
1058d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi            b'9bc1b2a288b26af7257a36277ae3816a7d4f16e89c1e7e77d0a5c48bad62b360')
1068d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        self.assertEqual(
1078d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi            self.calculate_tree_hash(two_meg_bytestring),
1088d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi            b'560c2c9333c719cb00cfdffee3ba293db17f58743cdd1f7e4055373ae6300afa')
1098d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        self.assertEqual(
1108d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi            self.calculate_tree_hash(four_meg_bytestring),
1118d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi            b'9491cb2ed1d4e7cd53215f4017c23ec4ad21d7050a1e6bb636c4f67e8cddb844')
1128d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        self.assertEqual(
1138d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi            self.calculate_tree_hash(bigger_bytestring),
1148d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi            b'12f3cbd6101b981cde074039f6f728071da8879d6f632de8afc7cdf00661b08f')
1158d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi
1168d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi    def test_empty_tree_hash(self):
1178d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        self.assertEqual(
1188d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi            self.calculate_tree_hash(''),
1198d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi            b'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855')
1208d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi
1218d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi
1228d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoiclass TestFileHash(unittest.TestCase):
1238d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi    def _gen_data(self):
1248d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        # Generate some pseudo-random bytes of data. We include the
1258d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        # hard-coded blob as an example that fails to decode via UTF-8.
1268d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        return os.urandom(5000) + b'\xc2\x00'
1278d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi
1288d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi    def test_compute_hash_tempfile(self):
1298d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        # Compute a hash from a file object. On Python 2 this uses a non-
1308d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        # binary mode. On Python 3, however, binary mode is required for
1318d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        # binary files. If not used, you will get UTF-8 code errors.
1328d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        if six.PY2:
1338d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi            mode = "w+"
1348d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        else:
1358d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi            mode = "wb+"
1368d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi
1378d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        with tempfile.TemporaryFile(mode=mode) as f:
1388d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi            f.write(self._gen_data())
1398d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi            f.seek(0)
1408d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi
1418d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi            compute_hashes_from_fileobj(f, chunk_size=512)
1428d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi
1438d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi    @unittest.skipUnless(six.PY3, 'Python 3 requires reading binary!')
1448d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi    def test_compute_hash_tempfile_py3(self):
1458d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        # Note the missing 'b' in the mode!
1468d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        with tempfile.TemporaryFile(mode='w+') as f:
1478d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi            with self.assertRaises(ValueError):
1488d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi                compute_hashes_from_fileobj(f, chunk_size=512)
1498d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi
1508d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        # What about file-like objects without a mode? If it has an
1518d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        # encoding we use it, otherwise attempt UTF-8 encoding to
1528d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        # bytes for hashing.
1538d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        f = StringIO('test data' * 500)
1548d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        compute_hashes_from_fileobj(f, chunk_size=512)
1558d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi
1568d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi    @unittest.skipUnless(six.PY2, 'Python 3 requires reading binary!')
1578d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi    def test_compute_hash_stringio(self):
1588d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        # Python 2 binary data in StringIO example
1598d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        f = StringIO(self._gen_data())
1608d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        compute_hashes_from_fileobj(f, chunk_size=512)
1618d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi
1628d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi    def test_compute_hash_bytesio(self):
1638d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        # Compute a hash from a file-like BytesIO object.
1648d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        f = BytesIO(self._gen_data())
1658d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi        compute_hashes_from_fileobj(f, chunk_size=512)
166