18d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# Copyright (c) 2012 Amazon.com, Inc. or its affiliates. All Rights Reserved 28d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# 38d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# Permission is hereby granted, free of charge, to any person obtaining a 48d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# copy of this software and associated documentation files (the 58d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# "Software"), to deal in the Software without restriction, including 68d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# without limitation the rights to use, copy, modify, merge, publish, dis- 78d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# tribute, sublicense, and/or sell copies of the Software, and to permit 88d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# persons to whom the Software is furnished to do so, subject to the fol- 98d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# lowing conditions: 108d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# 118d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# The above copyright notice and this permission notice shall be included 128d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# in all copies or substantial portions of the Software. 138d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# 148d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 158d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- 168d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 178d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 188d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 198d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 208d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# IN THE SOFTWARE. 218d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi# 228d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoiimport logging 238d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoiimport os 248d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoiimport tempfile 258d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoiimport time 268d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoifrom hashlib import sha256 278d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoifrom tests.unit import unittest 288d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 298d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoifrom boto.compat import BytesIO, six, StringIO 308d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoifrom boto.glacier.utils import minimum_part_size, chunk_hashes, tree_hash, \ 318d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi bytes_to_hex, compute_hashes_from_fileobj 328d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 338d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 348d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoiclass TestPartSizeCalculations(unittest.TestCase): 358d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi def test_small_values_still_use_default_part_size(self): 368d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.assertEqual(minimum_part_size(1), 4 * 1024 * 1024) 378d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 388d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi def test_under_the_maximum_value(self): 398d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # If we're under the maximum, we can use 4MB part sizes. 408d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.assertEqual(minimum_part_size(8 * 1024 * 1024), 418d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 4 * 1024 * 1024) 428d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 438d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi def test_gigabyte_size(self): 448d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # If we're over the maximum default part size, we go up to the next 458d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # power of two until we find a part size that keeps us under 10,000 468d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # parts. 478d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.assertEqual(minimum_part_size(8 * 1024 * 1024 * 10000), 488d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 8 * 1024 * 1024) 498d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 508d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi def test_terabyte_size(self): 518d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # For a 4 TB file we need at least a 512 MB part size. 528d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.assertEqual(minimum_part_size(4 * 1024 * 1024 * 1024 * 1024), 538d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 512 * 1024 * 1024) 548d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 558d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi def test_file_size_too_large(self): 568d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi with self.assertRaises(ValueError): 578d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi minimum_part_size((40000 * 1024 * 1024 * 1024) + 1) 588d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 598d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi def test_default_part_size_can_be_specified(self): 608d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi default_part_size = 2 * 1024 * 1024 618d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.assertEqual(minimum_part_size(8 * 1024 * 1024, default_part_size), 628d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi default_part_size) 638d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 648d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 658d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoiclass TestChunking(unittest.TestCase): 668d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi def test_chunk_hashes_exact(self): 678d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi chunks = chunk_hashes(b'a' * (2 * 1024 * 1024)) 688d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.assertEqual(len(chunks), 2) 698d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.assertEqual(chunks[0], sha256(b'a' * 1024 * 1024).digest()) 708d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 718d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi def test_chunks_with_leftovers(self): 728d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi bytestring = b'a' * (2 * 1024 * 1024 + 20) 738d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi chunks = chunk_hashes(bytestring) 748d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.assertEqual(len(chunks), 3) 758d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.assertEqual(chunks[0], sha256(b'a' * 1024 * 1024).digest()) 768d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.assertEqual(chunks[1], sha256(b'a' * 1024 * 1024).digest()) 778d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.assertEqual(chunks[2], sha256(b'a' * 20).digest()) 788d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 798d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi def test_less_than_one_chunk(self): 808d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi chunks = chunk_hashes(b'aaaa') 818d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.assertEqual(len(chunks), 1) 828d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.assertEqual(chunks[0], sha256(b'aaaa').digest()) 838d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 848d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 858d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoiclass TestTreeHash(unittest.TestCase): 868d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # For these tests, a set of reference tree hashes were computed. 878d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # This will at least catch any regressions to the tree hash 888d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # calculations. 898d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi def calculate_tree_hash(self, bytestring): 908d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi start = time.time() 918d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi calculated = bytes_to_hex(tree_hash(chunk_hashes(bytestring))) 928d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi end = time.time() 938d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi logging.debug("Tree hash calc time for length %s: %s", 948d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi len(bytestring), end - start) 958d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi return calculated 968d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 978d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi def test_tree_hash_calculations(self): 988d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi one_meg_bytestring = b'a' * (1 * 1024 * 1024) 998d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi two_meg_bytestring = b'a' * (2 * 1024 * 1024) 1008d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi four_meg_bytestring = b'a' * (4 * 1024 * 1024) 1018d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi bigger_bytestring = four_meg_bytestring + b'a' * 20 1028d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1038d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.assertEqual( 1048d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.calculate_tree_hash(one_meg_bytestring), 1058d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi b'9bc1b2a288b26af7257a36277ae3816a7d4f16e89c1e7e77d0a5c48bad62b360') 1068d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.assertEqual( 1078d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.calculate_tree_hash(two_meg_bytestring), 1088d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi b'560c2c9333c719cb00cfdffee3ba293db17f58743cdd1f7e4055373ae6300afa') 1098d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.assertEqual( 1108d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.calculate_tree_hash(four_meg_bytestring), 1118d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi b'9491cb2ed1d4e7cd53215f4017c23ec4ad21d7050a1e6bb636c4f67e8cddb844') 1128d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.assertEqual( 1138d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.calculate_tree_hash(bigger_bytestring), 1148d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi b'12f3cbd6101b981cde074039f6f728071da8879d6f632de8afc7cdf00661b08f') 1158d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1168d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi def test_empty_tree_hash(self): 1178d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.assertEqual( 1188d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi self.calculate_tree_hash(''), 1198d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi b'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855') 1208d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1218d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1228d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoiclass TestFileHash(unittest.TestCase): 1238d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi def _gen_data(self): 1248d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # Generate some pseudo-random bytes of data. We include the 1258d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # hard-coded blob as an example that fails to decode via UTF-8. 1268d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi return os.urandom(5000) + b'\xc2\x00' 1278d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1288d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi def test_compute_hash_tempfile(self): 1298d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # Compute a hash from a file object. On Python 2 this uses a non- 1308d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # binary mode. On Python 3, however, binary mode is required for 1318d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # binary files. If not used, you will get UTF-8 code errors. 1328d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi if six.PY2: 1338d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi mode = "w+" 1348d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi else: 1358d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi mode = "wb+" 1368d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1378d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi with tempfile.TemporaryFile(mode=mode) as f: 1388d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi f.write(self._gen_data()) 1398d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi f.seek(0) 1408d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1418d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi compute_hashes_from_fileobj(f, chunk_size=512) 1428d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1438d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi @unittest.skipUnless(six.PY3, 'Python 3 requires reading binary!') 1448d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi def test_compute_hash_tempfile_py3(self): 1458d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # Note the missing 'b' in the mode! 1468d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi with tempfile.TemporaryFile(mode='w+') as f: 1478d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi with self.assertRaises(ValueError): 1488d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi compute_hashes_from_fileobj(f, chunk_size=512) 1498d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1508d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # What about file-like objects without a mode? If it has an 1518d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # encoding we use it, otherwise attempt UTF-8 encoding to 1528d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # bytes for hashing. 1538d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi f = StringIO('test data' * 500) 1548d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi compute_hashes_from_fileobj(f, chunk_size=512) 1558d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1568d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi @unittest.skipUnless(six.PY2, 'Python 3 requires reading binary!') 1578d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi def test_compute_hash_stringio(self): 1588d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # Python 2 binary data in StringIO example 1598d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi f = StringIO(self._gen_data()) 1608d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi compute_hashes_from_fileobj(f, chunk_size=512) 1618d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi 1628d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi def test_compute_hash_bytesio(self): 1638d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi # Compute a hash from a file-like BytesIO object. 1648d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi f = BytesIO(self._gen_data()) 1658d2b206a675ec20ea07100c35df34e65ee1e45e8Ruchi Kandoi compute_hashes_from_fileobj(f, chunk_size=512) 166