new_github_file_system_test.py revision a1401311d1ab56c4ed0a474bd38c108f75cb0cd9
1#!/usr/bin/env python
2# Copyright 2013 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6import json
7from copy import deepcopy
8from cStringIO import StringIO
9from functools import partial
10from hashlib import sha1
11from random import random
12import unittest
13from zipfile import ZipFile
14
15from caching_file_system import CachingFileSystem
16from file_system import FileNotFoundError, StatInfo
17from fake_url_fetcher import FakeURLFSFetcher, MockURLFetcher
18from local_file_system import LocalFileSystem
19from new_github_file_system import GithubFileSystem
20from object_store_creator import ObjectStoreCreator
21from test_file_system import TestFileSystem
22
23
24class _TestBundle(object):
25  '''Bundles test file data with a GithubFileSystem and test utilites. Create
26  GithubFileSystems via |CreateGfs()|, the Fetcher it uses as |fetcher|,
27  randomly mutate its contents via |Mutate()|, and access the underlying zip
28  data via |files|.
29  '''
30
31  def __init__(self):
32    self.files = {
33      'zipfile/': '',
34      'zipfile/hello.txt': 'world',
35      'zipfile/readme': 'test zip',
36      'zipfile/dir/file1': 'contents',
37      'zipfile/dir/file2': 'more contents'
38    }
39    self._test_files = {
40      'test_owner': {
41        'changing-repo': {
42          'commits': {
43            'HEAD': self._MakeShaJson(self._GenerateHash())
44          },
45          'zipball': self._ZipFromFiles(self.files)
46        }
47      }
48    }
49
50    self._fake_fetcher = None
51
52
53  def CreateGfsAndFetcher(self):
54    fetchers = []
55    def create_mock_url_fetcher(base_path):
56      assert not fetchers
57      # Save this reference so we can replace the TestFileSystem in Mutate.
58      self._fake_fetcher = FakeURLFSFetcher(
59          TestFileSystem(self._test_files), base_path)
60      fetchers.append(MockURLFetcher(self._fake_fetcher))
61      return fetchers[-1]
62
63    # Constructing |gfs| will create a fetcher.
64    gfs = GithubFileSystem.ForTest(
65        'changing-repo/', create_mock_url_fetcher, path='')
66    assert len(fetchers) == 1
67    return gfs, fetchers[0]
68
69  def Mutate(self):
70    fake_version = self._GenerateHash()
71    fake_data = self._GenerateHash()
72    self.files['zipfile/hello.txt'] = fake_data
73    self.files['zipfile/new-file'] = fake_data
74    self.files['zipfile/dir/file1'] = fake_data
75    self._test_files['test_owner']['changing-repo']['zipball'] = (
76        self._ZipFromFiles(self.files))
77    self._test_files['test_owner']['changing-repo']['commits']['HEAD'] = (
78        self._MakeShaJson(fake_version))
79
80    # Update the file_system used by FakeURLFSFetcher so the above mutations
81    # propagate.
82    self._fake_fetcher.UpdateFS(TestFileSystem(self._test_files))
83
84    return fake_version, fake_data
85
86  def _GenerateHash(self):
87    '''Generates an arbitrary SHA1 hash.
88    '''
89    return sha1(str(random())).hexdigest()
90
91  def _MakeShaJson(self, hash_value):
92    commit_json = json.loads(deepcopy(LocalFileSystem('').ReadSingle(
93        'test_data/github_file_system/test_owner/repo/commits/HEAD').Get()))
94    commit_json['sha'] = hash_value
95    return json.dumps(commit_json)
96
97  def _ZipFromFiles(self, file_dict):
98    string = StringIO()
99    zipfile = ZipFile(string, 'w')
100    for filename, contents in file_dict.iteritems():
101      zipfile.writestr(filename, contents)
102    zipfile.close()
103    return string.getvalue()
104
105
106class TestGithubFileSystem(unittest.TestCase):
107  def setUp(self):
108    self._gfs = GithubFileSystem.ForTest(
109        'repo/', partial(FakeURLFSFetcher, LocalFileSystem('')))
110    # Start and finish the repository load.
111    self._cgfs = CachingFileSystem(self._gfs, ObjectStoreCreator.ForTest())
112
113  def testReadDirectory(self):
114    self._gfs.Refresh().Get()
115    self.assertEqual(
116        sorted(['requirements.txt', '.gitignore', 'README.md', 'src/']),
117        sorted(self._gfs.ReadSingle('').Get()))
118    self.assertEqual(
119        sorted(['__init__.notpy', 'hello.notpy']),
120        sorted(self._gfs.ReadSingle('src/').Get()))
121
122  def testReadFile(self):
123    self._gfs.Refresh().Get()
124    expected = (
125      '# Compiled Python files\n'
126      '*.pyc\n'
127    )
128    self.assertEqual(expected, self._gfs.ReadSingle('.gitignore').Get())
129
130  def testMultipleReads(self):
131    self._gfs.Refresh().Get()
132    self.assertEqual(
133        self._gfs.ReadSingle('requirements.txt').Get(),
134        self._gfs.ReadSingle('requirements.txt').Get())
135
136  def testReads(self):
137    self._gfs.Refresh().Get()
138    expected = {
139        'src/': sorted(['hello.notpy', '__init__.notpy']),
140        '': sorted(['requirements.txt', '.gitignore', 'README.md', 'src/'])
141    }
142
143    read = self._gfs.Read(['', 'src/']).Get()
144    self.assertEqual(expected['src/'], sorted(read['src/']))
145    self.assertEqual(expected[''], sorted(read['']))
146
147  def testStat(self):
148    # This is the hash value from the zip on disk.
149    real_hash = 'c36fc23688a9ec9e264d3182905dc0151bfff7d7'
150
151    self._gfs.Refresh().Get()
152    dir_stat = StatInfo(real_hash, {
153      'hello.notpy': StatInfo(real_hash),
154      '__init__.notpy': StatInfo(real_hash)
155    })
156
157    self.assertEqual(StatInfo(real_hash), self._gfs.Stat('README.md'))
158    self.assertEqual(StatInfo(real_hash), self._gfs.Stat('src/hello.notpy'))
159    self.assertEqual(dir_stat, self._gfs.Stat('src/'))
160
161  def testBadReads(self):
162    self._gfs.Refresh().Get()
163    self.assertRaises(FileNotFoundError, self._gfs.Stat, 'DONT_README.md')
164    self.assertRaises(FileNotFoundError,
165                      self._gfs.ReadSingle('DONT_README.md').Get)
166
167  def testCachingFileSystem(self):
168    self._cgfs.Refresh().Get()
169    initial_cgfs_read_one = self._cgfs.ReadSingle('src/hello.notpy').Get()
170
171    self.assertEqual(initial_cgfs_read_one,
172                     self._gfs.ReadSingle('src/hello.notpy').Get())
173    self.assertEqual(initial_cgfs_read_one,
174                     self._cgfs.ReadSingle('src/hello.notpy').Get())
175
176    initial_cgfs_read_two = self._cgfs.Read(
177        ['README.md', 'requirements.txt']).Get()
178
179    self.assertEqual(
180        initial_cgfs_read_two,
181        self._gfs.Read(['README.md', 'requirements.txt']).Get())
182    self.assertEqual(
183        initial_cgfs_read_two,
184        self._cgfs.Read(['README.md', 'requirements.txt']).Get())
185
186  def testWithoutRefresh(self):
187    # Without refreshing it will still read the content from blobstore, and it
188    # does this via the magic of the FakeURLFSFetcher.
189    self.assertEqual(['__init__.notpy', 'hello.notpy'],
190                     sorted(self._gfs.ReadSingle('src/').Get()))
191
192  def testRefresh(self):
193    test_bundle = _TestBundle()
194    gfs, fetcher = test_bundle.CreateGfsAndFetcher()
195
196    # It shouldn't fetch until Refresh does so; then it will do 2, one for the
197    # stat, and another for the read.
198    self.assertTrue(*fetcher.CheckAndReset())
199    gfs.Refresh().Get()
200    self.assertTrue(*fetcher.CheckAndReset(fetch_count=1,
201                                           fetch_async_count=1,
202                                           fetch_resolve_count=1))
203
204    # Refresh is just an alias for Read('').
205    gfs.Refresh().Get()
206    self.assertTrue(*fetcher.CheckAndReset())
207
208    initial_dir_read = sorted(gfs.ReadSingle('').Get())
209    initial_file_read = gfs.ReadSingle('dir/file1').Get()
210
211    version, data = test_bundle.Mutate()
212
213    # Check that changes have not effected the file system yet.
214    self.assertEqual(initial_dir_read, sorted(gfs.ReadSingle('').Get()))
215    self.assertEqual(initial_file_read, gfs.ReadSingle('dir/file1').Get())
216    self.assertNotEqual(StatInfo(version), gfs.Stat(''))
217
218    gfs, fetcher = test_bundle.CreateGfsAndFetcher()
219    gfs.Refresh().Get()
220    self.assertTrue(*fetcher.CheckAndReset(fetch_count=1,
221                                           fetch_async_count=1,
222                                           fetch_resolve_count=1))
223
224    # Check that the changes have affected the file system.
225    self.assertEqual(data, gfs.ReadSingle('new-file').Get())
226    self.assertEqual(test_bundle.files['zipfile/dir/file1'],
227                     gfs.ReadSingle('dir/file1').Get())
228    self.assertEqual(StatInfo(version), gfs.Stat('new-file'))
229
230    # Regression test: ensure that reading the data after it's been mutated,
231    # but before Refresh() has been realised, still returns the correct data.
232    gfs, fetcher = test_bundle.CreateGfsAndFetcher()
233    version, data = test_bundle.Mutate()
234
235    refresh_future = gfs.Refresh()
236    self.assertTrue(*fetcher.CheckAndReset(fetch_count=1, fetch_async_count=1))
237
238    self.assertEqual(data, gfs.ReadSingle('new-file').Get())
239    self.assertEqual(test_bundle.files['zipfile/dir/file1'],
240                     gfs.ReadSingle('dir/file1').Get())
241    self.assertEqual(StatInfo(version), gfs.Stat('new-file'))
242
243    refresh_future.Get()
244    self.assertTrue(*fetcher.CheckAndReset(fetch_resolve_count=1))
245
246  def testGetThenRefreshOnStartup(self):
247    # Regression test: Test that calling Get() but never resolving the future,
248    # then Refresh()ing the data, causes the data to be refreshed.
249    test_bundle = _TestBundle()
250    gfs, fetcher = test_bundle.CreateGfsAndFetcher()
251    self.assertTrue(*fetcher.CheckAndReset())
252
253    # Get a predictable version.
254    version, data = test_bundle.Mutate()
255
256    read_future = gfs.ReadSingle('hello.txt')
257    # Fetch for the Stat(), async-fetch for the Read().
258    self.assertTrue(*fetcher.CheckAndReset(fetch_count=1, fetch_async_count=1))
259
260    refresh_future = gfs.Refresh()
261    self.assertTrue(*fetcher.CheckAndReset())
262
263    self.assertEqual(data, read_future.Get())
264    self.assertTrue(*fetcher.CheckAndReset(fetch_resolve_count=1))
265    self.assertEqual(StatInfo(version), gfs.Stat('hello.txt'))
266    self.assertTrue(*fetcher.CheckAndReset())
267
268    # The fetch will already have been resolved, so resolving the Refresh won't
269    # affect anything.
270    refresh_future.Get()
271    self.assertTrue(*fetcher.CheckAndReset())
272
273    # Read data should not have changed.
274    self.assertEqual(data, gfs.ReadSingle('hello.txt').Get())
275    self.assertEqual(StatInfo(version), gfs.Stat('hello.txt'))
276    self.assertTrue(*fetcher.CheckAndReset())
277
278
279if __name__ == '__main__':
280  unittest.main()
281