new_github_file_system_test.py revision a1401311d1ab56c4ed0a474bd38c108f75cb0cd9
1#!/usr/bin/env python 2# Copyright 2013 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6import json 7from copy import deepcopy 8from cStringIO import StringIO 9from functools import partial 10from hashlib import sha1 11from random import random 12import unittest 13from zipfile import ZipFile 14 15from caching_file_system import CachingFileSystem 16from file_system import FileNotFoundError, StatInfo 17from fake_url_fetcher import FakeURLFSFetcher, MockURLFetcher 18from local_file_system import LocalFileSystem 19from new_github_file_system import GithubFileSystem 20from object_store_creator import ObjectStoreCreator 21from test_file_system import TestFileSystem 22 23 24class _TestBundle(object): 25 '''Bundles test file data with a GithubFileSystem and test utilites. Create 26 GithubFileSystems via |CreateGfs()|, the Fetcher it uses as |fetcher|, 27 randomly mutate its contents via |Mutate()|, and access the underlying zip 28 data via |files|. 29 ''' 30 31 def __init__(self): 32 self.files = { 33 'zipfile/': '', 34 'zipfile/hello.txt': 'world', 35 'zipfile/readme': 'test zip', 36 'zipfile/dir/file1': 'contents', 37 'zipfile/dir/file2': 'more contents' 38 } 39 self._test_files = { 40 'test_owner': { 41 'changing-repo': { 42 'commits': { 43 'HEAD': self._MakeShaJson(self._GenerateHash()) 44 }, 45 'zipball': self._ZipFromFiles(self.files) 46 } 47 } 48 } 49 50 self._fake_fetcher = None 51 52 53 def CreateGfsAndFetcher(self): 54 fetchers = [] 55 def create_mock_url_fetcher(base_path): 56 assert not fetchers 57 # Save this reference so we can replace the TestFileSystem in Mutate. 58 self._fake_fetcher = FakeURLFSFetcher( 59 TestFileSystem(self._test_files), base_path) 60 fetchers.append(MockURLFetcher(self._fake_fetcher)) 61 return fetchers[-1] 62 63 # Constructing |gfs| will create a fetcher. 64 gfs = GithubFileSystem.ForTest( 65 'changing-repo/', create_mock_url_fetcher, path='') 66 assert len(fetchers) == 1 67 return gfs, fetchers[0] 68 69 def Mutate(self): 70 fake_version = self._GenerateHash() 71 fake_data = self._GenerateHash() 72 self.files['zipfile/hello.txt'] = fake_data 73 self.files['zipfile/new-file'] = fake_data 74 self.files['zipfile/dir/file1'] = fake_data 75 self._test_files['test_owner']['changing-repo']['zipball'] = ( 76 self._ZipFromFiles(self.files)) 77 self._test_files['test_owner']['changing-repo']['commits']['HEAD'] = ( 78 self._MakeShaJson(fake_version)) 79 80 # Update the file_system used by FakeURLFSFetcher so the above mutations 81 # propagate. 82 self._fake_fetcher.UpdateFS(TestFileSystem(self._test_files)) 83 84 return fake_version, fake_data 85 86 def _GenerateHash(self): 87 '''Generates an arbitrary SHA1 hash. 88 ''' 89 return sha1(str(random())).hexdigest() 90 91 def _MakeShaJson(self, hash_value): 92 commit_json = json.loads(deepcopy(LocalFileSystem('').ReadSingle( 93 'test_data/github_file_system/test_owner/repo/commits/HEAD').Get())) 94 commit_json['sha'] = hash_value 95 return json.dumps(commit_json) 96 97 def _ZipFromFiles(self, file_dict): 98 string = StringIO() 99 zipfile = ZipFile(string, 'w') 100 for filename, contents in file_dict.iteritems(): 101 zipfile.writestr(filename, contents) 102 zipfile.close() 103 return string.getvalue() 104 105 106class TestGithubFileSystem(unittest.TestCase): 107 def setUp(self): 108 self._gfs = GithubFileSystem.ForTest( 109 'repo/', partial(FakeURLFSFetcher, LocalFileSystem(''))) 110 # Start and finish the repository load. 111 self._cgfs = CachingFileSystem(self._gfs, ObjectStoreCreator.ForTest()) 112 113 def testReadDirectory(self): 114 self._gfs.Refresh().Get() 115 self.assertEqual( 116 sorted(['requirements.txt', '.gitignore', 'README.md', 'src/']), 117 sorted(self._gfs.ReadSingle('').Get())) 118 self.assertEqual( 119 sorted(['__init__.notpy', 'hello.notpy']), 120 sorted(self._gfs.ReadSingle('src/').Get())) 121 122 def testReadFile(self): 123 self._gfs.Refresh().Get() 124 expected = ( 125 '# Compiled Python files\n' 126 '*.pyc\n' 127 ) 128 self.assertEqual(expected, self._gfs.ReadSingle('.gitignore').Get()) 129 130 def testMultipleReads(self): 131 self._gfs.Refresh().Get() 132 self.assertEqual( 133 self._gfs.ReadSingle('requirements.txt').Get(), 134 self._gfs.ReadSingle('requirements.txt').Get()) 135 136 def testReads(self): 137 self._gfs.Refresh().Get() 138 expected = { 139 'src/': sorted(['hello.notpy', '__init__.notpy']), 140 '': sorted(['requirements.txt', '.gitignore', 'README.md', 'src/']) 141 } 142 143 read = self._gfs.Read(['', 'src/']).Get() 144 self.assertEqual(expected['src/'], sorted(read['src/'])) 145 self.assertEqual(expected[''], sorted(read[''])) 146 147 def testStat(self): 148 # This is the hash value from the zip on disk. 149 real_hash = 'c36fc23688a9ec9e264d3182905dc0151bfff7d7' 150 151 self._gfs.Refresh().Get() 152 dir_stat = StatInfo(real_hash, { 153 'hello.notpy': StatInfo(real_hash), 154 '__init__.notpy': StatInfo(real_hash) 155 }) 156 157 self.assertEqual(StatInfo(real_hash), self._gfs.Stat('README.md')) 158 self.assertEqual(StatInfo(real_hash), self._gfs.Stat('src/hello.notpy')) 159 self.assertEqual(dir_stat, self._gfs.Stat('src/')) 160 161 def testBadReads(self): 162 self._gfs.Refresh().Get() 163 self.assertRaises(FileNotFoundError, self._gfs.Stat, 'DONT_README.md') 164 self.assertRaises(FileNotFoundError, 165 self._gfs.ReadSingle('DONT_README.md').Get) 166 167 def testCachingFileSystem(self): 168 self._cgfs.Refresh().Get() 169 initial_cgfs_read_one = self._cgfs.ReadSingle('src/hello.notpy').Get() 170 171 self.assertEqual(initial_cgfs_read_one, 172 self._gfs.ReadSingle('src/hello.notpy').Get()) 173 self.assertEqual(initial_cgfs_read_one, 174 self._cgfs.ReadSingle('src/hello.notpy').Get()) 175 176 initial_cgfs_read_two = self._cgfs.Read( 177 ['README.md', 'requirements.txt']).Get() 178 179 self.assertEqual( 180 initial_cgfs_read_two, 181 self._gfs.Read(['README.md', 'requirements.txt']).Get()) 182 self.assertEqual( 183 initial_cgfs_read_two, 184 self._cgfs.Read(['README.md', 'requirements.txt']).Get()) 185 186 def testWithoutRefresh(self): 187 # Without refreshing it will still read the content from blobstore, and it 188 # does this via the magic of the FakeURLFSFetcher. 189 self.assertEqual(['__init__.notpy', 'hello.notpy'], 190 sorted(self._gfs.ReadSingle('src/').Get())) 191 192 def testRefresh(self): 193 test_bundle = _TestBundle() 194 gfs, fetcher = test_bundle.CreateGfsAndFetcher() 195 196 # It shouldn't fetch until Refresh does so; then it will do 2, one for the 197 # stat, and another for the read. 198 self.assertTrue(*fetcher.CheckAndReset()) 199 gfs.Refresh().Get() 200 self.assertTrue(*fetcher.CheckAndReset(fetch_count=1, 201 fetch_async_count=1, 202 fetch_resolve_count=1)) 203 204 # Refresh is just an alias for Read(''). 205 gfs.Refresh().Get() 206 self.assertTrue(*fetcher.CheckAndReset()) 207 208 initial_dir_read = sorted(gfs.ReadSingle('').Get()) 209 initial_file_read = gfs.ReadSingle('dir/file1').Get() 210 211 version, data = test_bundle.Mutate() 212 213 # Check that changes have not effected the file system yet. 214 self.assertEqual(initial_dir_read, sorted(gfs.ReadSingle('').Get())) 215 self.assertEqual(initial_file_read, gfs.ReadSingle('dir/file1').Get()) 216 self.assertNotEqual(StatInfo(version), gfs.Stat('')) 217 218 gfs, fetcher = test_bundle.CreateGfsAndFetcher() 219 gfs.Refresh().Get() 220 self.assertTrue(*fetcher.CheckAndReset(fetch_count=1, 221 fetch_async_count=1, 222 fetch_resolve_count=1)) 223 224 # Check that the changes have affected the file system. 225 self.assertEqual(data, gfs.ReadSingle('new-file').Get()) 226 self.assertEqual(test_bundle.files['zipfile/dir/file1'], 227 gfs.ReadSingle('dir/file1').Get()) 228 self.assertEqual(StatInfo(version), gfs.Stat('new-file')) 229 230 # Regression test: ensure that reading the data after it's been mutated, 231 # but before Refresh() has been realised, still returns the correct data. 232 gfs, fetcher = test_bundle.CreateGfsAndFetcher() 233 version, data = test_bundle.Mutate() 234 235 refresh_future = gfs.Refresh() 236 self.assertTrue(*fetcher.CheckAndReset(fetch_count=1, fetch_async_count=1)) 237 238 self.assertEqual(data, gfs.ReadSingle('new-file').Get()) 239 self.assertEqual(test_bundle.files['zipfile/dir/file1'], 240 gfs.ReadSingle('dir/file1').Get()) 241 self.assertEqual(StatInfo(version), gfs.Stat('new-file')) 242 243 refresh_future.Get() 244 self.assertTrue(*fetcher.CheckAndReset(fetch_resolve_count=1)) 245 246 def testGetThenRefreshOnStartup(self): 247 # Regression test: Test that calling Get() but never resolving the future, 248 # then Refresh()ing the data, causes the data to be refreshed. 249 test_bundle = _TestBundle() 250 gfs, fetcher = test_bundle.CreateGfsAndFetcher() 251 self.assertTrue(*fetcher.CheckAndReset()) 252 253 # Get a predictable version. 254 version, data = test_bundle.Mutate() 255 256 read_future = gfs.ReadSingle('hello.txt') 257 # Fetch for the Stat(), async-fetch for the Read(). 258 self.assertTrue(*fetcher.CheckAndReset(fetch_count=1, fetch_async_count=1)) 259 260 refresh_future = gfs.Refresh() 261 self.assertTrue(*fetcher.CheckAndReset()) 262 263 self.assertEqual(data, read_future.Get()) 264 self.assertTrue(*fetcher.CheckAndReset(fetch_resolve_count=1)) 265 self.assertEqual(StatInfo(version), gfs.Stat('hello.txt')) 266 self.assertTrue(*fetcher.CheckAndReset()) 267 268 # The fetch will already have been resolved, so resolving the Refresh won't 269 # affect anything. 270 refresh_future.Get() 271 self.assertTrue(*fetcher.CheckAndReset()) 272 273 # Read data should not have changed. 274 self.assertEqual(data, gfs.ReadSingle('hello.txt').Get()) 275 self.assertEqual(StatInfo(version), gfs.Stat('hello.txt')) 276 self.assertTrue(*fetcher.CheckAndReset()) 277 278 279if __name__ == '__main__': 280 unittest.main() 281