1b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik# Copyright 2012 Google Inc. All Rights Reserved.
2b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik#
3b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik# Licensed under the Apache License, Version 2.0 (the "License");
4b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik# you may not use this file except in compliance with the License.
5b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik# You may obtain a copy of the License at
6b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik#
7b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik#    http://www.apache.org/licenses/LICENSE-2.0
8b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik#
9b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik# Unless required by applicable law or agreed to in writing,
10b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik# software distributed under the License is distributed on an
11b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik# either express or implied. See the License for the specific
13b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik# language governing permissions and limitations under the License.
14b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
15b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik"""Python wrappers for the Google Storage RESTful API."""
16b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
17b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
18b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
19b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
20b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
21b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik__all__ = ['ReadBuffer',
22b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik           'StreamingBuffer',
23b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik          ]
24b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
25b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikimport collections
26b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikimport os
27b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikimport urlparse
28b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
29b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikfrom . import api_utils
30b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikfrom . import common
31b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikfrom . import errors
32b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikfrom . import rest_api
33b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
34b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craiktry:
35b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  from google.appengine.api import urlfetch
36b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  from google.appengine.ext import ndb
37b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikexcept ImportError:
38b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  from google.appengine.api import urlfetch
39b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  from google.appengine.ext import ndb
40b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
41b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
42b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
43b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikdef _get_storage_api(retry_params, account_id=None):
44b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  """Returns storage_api instance for API methods.
45b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
46b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  Args:
47b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    retry_params: An instance of api_utils.RetryParams. If none,
48b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik     thread's default will be used.
49b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    account_id: Internal-use only.
50b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
51b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  Returns:
52b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    A storage_api instance to handle urlfetch work to GCS.
53b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    On dev appserver, this instance by default will talk to a local stub
54b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    unless common.ACCESS_TOKEN is set. That token will be used to talk
55b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    to the real GCS.
56b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  """
57b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
58b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
59b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  api = _StorageApi(_StorageApi.full_control_scope,
60b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik                    service_account_id=account_id,
61b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik                    retry_params=retry_params)
62b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  if common.local_run() and not common.get_access_token():
63b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    api.api_url = common.local_api_url()
64b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  if common.get_access_token():
65b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    api.token = common.get_access_token()
66b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  return api
67b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
68b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
69b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikclass _StorageApi(rest_api._RestApi):
70b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  """A simple wrapper for the Google Storage RESTful API.
71b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
72b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  WARNING: Do NOT directly use this api. It's an implementation detail
73b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  and is subject to change at any release.
74b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
75b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  All async methods have similar args and returns.
76b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
77b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  Args:
78b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    path: The path to the Google Storage object or bucket, e.g.
79b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      '/mybucket/myfile' or '/mybucket'.
80b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    **kwd: Options for urlfetch. e.g.
81b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      headers={'content-type': 'text/plain'}, payload='blah'.
82b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
83b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  Returns:
84b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    A ndb Future. When fulfilled, future.get_result() should return
85b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    a tuple of (status, headers, content) that represents a HTTP response
86b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    of Google Cloud Storage XML API.
87b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  """
88b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
89b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  api_url = 'https://storage.googleapis.com'
90b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  read_only_scope = 'https://www.googleapis.com/auth/devstorage.read_only'
91b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  read_write_scope = 'https://www.googleapis.com/auth/devstorage.read_write'
92b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  full_control_scope = 'https://www.googleapis.com/auth/devstorage.full_control'
93b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
94b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def __getstate__(self):
95b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """Store state as part of serialization/pickling.
96b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
97b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Returns:
98b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      A tuple (of dictionaries) with the state of this object
99b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
100b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    return (super(_StorageApi, self).__getstate__(), {'api_url': self.api_url})
101b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
102b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def __setstate__(self, state):
103b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """Restore state as part of deserialization/unpickling.
104b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
105b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Args:
106b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      state: the tuple from a __getstate__ call
107b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
108b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    superstate, localstate = state
109b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    super(_StorageApi, self).__setstate__(superstate)
110b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self.api_url = localstate['api_url']
111b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
112b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  @api_utils._eager_tasklet
113b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  @ndb.tasklet
114b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def do_request_async(self, url, method='GET', headers=None, payload=None,
115b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik                       deadline=None, callback=None):
116b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """Inherit docs.
117b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
118b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    This method translates urlfetch exceptions to more service specific ones.
119b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
120b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    if headers is None:
121b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      headers = {}
122b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    if 'x-goog-api-version' not in headers:
123b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      headers['x-goog-api-version'] = '2'
124b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    headers['accept-encoding'] = 'gzip, *'
125b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    try:
126b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      resp_tuple = yield super(_StorageApi, self).do_request_async(
127b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik          url, method=method, headers=headers, payload=payload,
128b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik          deadline=deadline, callback=callback)
129b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    except urlfetch.DownloadError, e:
130b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      raise errors.TimeoutError(
131b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik          'Request to Google Cloud Storage timed out.', e)
132b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
133b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    raise ndb.Return(resp_tuple)
134b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
135b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
136b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def post_object_async(self, path, **kwds):
137b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """POST to an object."""
138b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    return self.do_request_async(self.api_url + path, 'POST', **kwds)
139b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
140b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def put_object_async(self, path, **kwds):
141b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """PUT an object."""
142b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    return self.do_request_async(self.api_url + path, 'PUT', **kwds)
143b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
144b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def get_object_async(self, path, **kwds):
145b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """GET an object.
146b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
147b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Note: No payload argument is supported.
148b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
149b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    return self.do_request_async(self.api_url + path, 'GET', **kwds)
150b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
151b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def delete_object_async(self, path, **kwds):
152b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """DELETE an object.
153b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
154b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Note: No payload argument is supported.
155b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
156b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    return self.do_request_async(self.api_url + path, 'DELETE', **kwds)
157b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
158b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def head_object_async(self, path, **kwds):
159b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """HEAD an object.
160b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
161b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Depending on request headers, HEAD returns various object properties,
162b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    e.g. Content-Length, Last-Modified, and ETag.
163b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
164b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Note: No payload argument is supported.
165b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
166b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    return self.do_request_async(self.api_url + path, 'HEAD', **kwds)
167b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
168b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def get_bucket_async(self, path, **kwds):
169b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """GET a bucket."""
170b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    return self.do_request_async(self.api_url + path, 'GET', **kwds)
171b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
172b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
173b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik_StorageApi = rest_api.add_sync_methods(_StorageApi)
174b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
175b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
176b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikclass ReadBuffer(object):
177b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  """A class for reading Google storage files."""
178b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
179b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  DEFAULT_BUFFER_SIZE = 1024 * 1024
180b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  MAX_REQUEST_SIZE = 30 * DEFAULT_BUFFER_SIZE
181b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
182b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def __init__(self,
183b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik               api,
184b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik               path,
185b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik               buffer_size=DEFAULT_BUFFER_SIZE,
186b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik               max_request_size=MAX_REQUEST_SIZE):
187b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """Constructor.
188b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
189b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Args:
190b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      api: A StorageApi instance.
191b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      path: Quoted/escaped path to the object, e.g. /mybucket/myfile
192b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      buffer_size: buffer size. The ReadBuffer keeps
193b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        one buffer. But there may be a pending future that contains
194b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        a second buffer. This size must be less than max_request_size.
195b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      max_request_size: Max bytes to request in one urlfetch.
196b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
197b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._api = api
198b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._path = path
199b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self.name = api_utils._unquote_filename(path)
200b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self.closed = False
201b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
202b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    assert buffer_size <= max_request_size
203b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._buffer_size = buffer_size
204b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._max_request_size = max_request_size
205b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._offset = 0
206b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._buffer = _Buffer()
207b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._etag = None
208b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
209b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    get_future = self._get_segment(0, self._buffer_size, check_response=False)
210b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
211b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    status, headers, content = self._api.head_object(path)
212b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    errors.check_status(status, [200], path, resp_headers=headers, body=content)
213b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._file_size = long(common.get_stored_content_length(headers))
214b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._check_etag(headers.get('etag'))
215b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
216b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._buffer_future = None
217b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
218b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    if self._file_size != 0:
219b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      content, check_response_closure = get_future.get_result()
220b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      check_response_closure()
221b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      self._buffer.reset(content)
222b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      self._request_next_buffer()
223b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
224b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def __getstate__(self):
225b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """Store state as part of serialization/pickling.
226b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
227b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    The contents of the read buffer are not stored, only the current offset for
228b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    data read by the client. A new read buffer is established at unpickling.
229b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    The head information for the object (file size and etag) are stored to
230b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    reduce startup and ensure the file has not changed.
231b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
232b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Returns:
233b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      A dictionary with the state of this object
234b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
235b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    return {'api': self._api,
236b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik            'path': self._path,
237b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik            'buffer_size': self._buffer_size,
238b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik            'request_size': self._max_request_size,
239b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik            'etag': self._etag,
240b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik            'size': self._file_size,
241b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik            'offset': self._offset,
242b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik            'closed': self.closed}
243b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
244b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def __setstate__(self, state):
245b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """Restore state as part of deserialization/unpickling.
246b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
247b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Args:
248b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      state: the dictionary from a __getstate__ call
249b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
250b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Along with restoring the state, pre-fetch the next read buffer.
251b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
252b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._api = state['api']
253b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._path = state['path']
254b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self.name = api_utils._unquote_filename(self._path)
255b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._buffer_size = state['buffer_size']
256b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._max_request_size = state['request_size']
257b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._etag = state['etag']
258b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._file_size = state['size']
259b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._offset = state['offset']
260b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._buffer = _Buffer()
261b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self.closed = state['closed']
262b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._buffer_future = None
263b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    if self._remaining() and not self.closed:
264b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      self._request_next_buffer()
265b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
266b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def __iter__(self):
267b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """Iterator interface.
268b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
269b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Note the ReadBuffer container itself is the iterator. It's
270b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    (quote PEP0234)
271b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    'destructive: they consumes all the values and a second iterator
272b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    cannot easily be created that iterates independently over the same values.
273b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    You could open the file for the second time, or seek() to the beginning.'
274b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
275b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Returns:
276b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      Self.
277b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
278b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    return self
279b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
280b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def next(self):
281b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    line = self.readline()
282b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    if not line:
283b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      raise StopIteration()
284b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    return line
285b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
286b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def readline(self, size=-1):
287b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """Read one line delimited by '\n' from the file.
288b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
289b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    A trailing newline character is kept in the string. It may be absent when a
290b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    file ends with an incomplete line. If the size argument is non-negative,
291b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    it specifies the maximum string size (counting the newline) to return.
292b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    A negative size is the same as unspecified. Empty string is returned
293b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    only when EOF is encountered immediately.
294b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
295b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Args:
296b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      size: Maximum number of bytes to read. If not specified, readline stops
297b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        only on '\n' or EOF.
298b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
299b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Returns:
300b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      The data read as a string.
301b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
302b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Raises:
303b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      IOError: When this buffer is closed.
304b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
305b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._check_open()
306b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    if size == 0 or not self._remaining():
307b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      return ''
308b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
309b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    data_list = []
310b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    newline_offset = self._buffer.find_newline(size)
311b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    while newline_offset < 0:
312b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      data = self._buffer.read(size)
313b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      size -= len(data)
314b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      self._offset += len(data)
315b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      data_list.append(data)
316b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      if size == 0 or not self._remaining():
317b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        return ''.join(data_list)
318b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      self._buffer.reset(self._buffer_future.get_result())
319b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      self._request_next_buffer()
320b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      newline_offset = self._buffer.find_newline(size)
321b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
322b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    data = self._buffer.read_to_offset(newline_offset + 1)
323b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._offset += len(data)
324b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    data_list.append(data)
325b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
326b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    return ''.join(data_list)
327b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
328b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def read(self, size=-1):
329b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """Read data from RAW file.
330b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
331b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Args:
332b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      size: Number of bytes to read as integer. Actual number of bytes
333b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        read is always equal to size unless EOF is reached. If size is
334b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        negative or unspecified, read the entire file.
335b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
336b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Returns:
337b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      data read as str.
338b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
339b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Raises:
340b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      IOError: When this buffer is closed.
341b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
342b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._check_open()
343b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    if not self._remaining():
344b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      return ''
345b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
346b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    data_list = []
347b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    while True:
348b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      remaining = self._buffer.remaining()
349b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      if size >= 0 and size < remaining:
350b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        data_list.append(self._buffer.read(size))
351b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        self._offset += size
352b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        break
353b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      else:
354b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        size -= remaining
355b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        self._offset += remaining
356b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        data_list.append(self._buffer.read())
357b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
358b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        if self._buffer_future is None:
359b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik          if size < 0 or size >= self._remaining():
360b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik            needs = self._remaining()
361b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik          else:
362b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik            needs = size
363b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik          data_list.extend(self._get_segments(self._offset, needs))
364b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik          self._offset += needs
365b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik          break
366b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
367b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        if self._buffer_future:
368b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik          self._buffer.reset(self._buffer_future.get_result())
369b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik          self._buffer_future = None
370b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
371b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    if self._buffer_future is None:
372b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      self._request_next_buffer()
373b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    return ''.join(data_list)
374b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
375b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def _remaining(self):
376b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    return self._file_size - self._offset
377b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
378b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def _request_next_buffer(self):
379b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """Request next buffer.
380b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
381b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Requires self._offset and self._buffer are in consistent state.
382b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
383b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._buffer_future = None
384b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    next_offset = self._offset + self._buffer.remaining()
385b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    if next_offset != self._file_size:
386b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      self._buffer_future = self._get_segment(next_offset,
387b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik                                              self._buffer_size)
388b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
389b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def _get_segments(self, start, request_size):
390b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """Get segments of the file from Google Storage as a list.
391b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
392b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    A large request is broken into segments to avoid hitting urlfetch
393b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    response size limit. Each segment is returned from a separate urlfetch.
394b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
395b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Args:
396b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      start: start offset to request. Inclusive. Have to be within the
397b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        range of the file.
398b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      request_size: number of bytes to request.
399b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
400b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Returns:
401b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      A list of file segments in order
402b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
403b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    if not request_size:
404b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      return []
405b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
406b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    end = start + request_size
407b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    futures = []
408b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
409b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    while request_size > self._max_request_size:
410b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      futures.append(self._get_segment(start, self._max_request_size))
411b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      request_size -= self._max_request_size
412b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      start += self._max_request_size
413b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    if start < end:
414b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      futures.append(self._get_segment(start, end-start))
415b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    return [fut.get_result() for fut in futures]
416b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
417b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  @ndb.tasklet
418b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def _get_segment(self, start, request_size, check_response=True):
419b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """Get a segment of the file from Google Storage.
420b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
421b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Args:
422b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      start: start offset of the segment. Inclusive. Have to be within the
423b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        range of the file.
424b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      request_size: number of bytes to request. Have to be small enough
425b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        for a single urlfetch request. May go over the logical range of the
426b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        file.
427b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      check_response: True to check the validity of GCS response automatically
428b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        before the future returns. False otherwise. See Yields section.
429b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
430b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Yields:
431b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      If check_response is True, the segment [start, start + request_size)
432b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      of the file.
433b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      Otherwise, a tuple. The first element is the unverified file segment.
434b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      The second element is a closure that checks response. Caller should
435b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      first invoke the closure before consuing the file segment.
436b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
437b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Raises:
438b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      ValueError: if the file has changed while reading.
439b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
440b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    end = start + request_size - 1
441b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    content_range = '%d-%d' % (start, end)
442b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    headers = {'Range': 'bytes=' + content_range}
443b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    status, resp_headers, content = yield self._api.get_object_async(
444b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        self._path, headers=headers)
445b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    def _checker():
446b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      errors.check_status(status, [200, 206], self._path, headers,
447b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik                          resp_headers, body=content)
448b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      self._check_etag(resp_headers.get('etag'))
449b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    if check_response:
450b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      _checker()
451b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      raise ndb.Return(content)
452b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    raise ndb.Return(content, _checker)
453b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
454b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def _check_etag(self, etag):
455b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """Check if etag is the same across requests to GCS.
456b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
457b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    If self._etag is None, set it. If etag is set, check that the new
458b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    etag equals the old one.
459b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
460b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    In the __init__ method, we fire one HEAD and one GET request using
461b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    ndb tasklet. One of them would return first and set the first value.
462b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
463b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Args:
464b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      etag: etag from a GCS HTTP response. None if etag is not part of the
465b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        response header. It could be None for example in the case of GCS
466b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        composite file.
467b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
468b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Raises:
469b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      ValueError: if two etags are not equal.
470b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
471b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    if etag is None:
472b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      return
473b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    elif self._etag is None:
474b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      self._etag = etag
475b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    elif self._etag != etag:
476b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      raise ValueError('File on GCS has changed while reading.')
477b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
478b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def close(self):
479b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self.closed = True
480b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._buffer = None
481b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._buffer_future = None
482b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
483b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def __enter__(self):
484b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    return self
485b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
486b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def __exit__(self, atype, value, traceback):
487b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self.close()
488b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    return False
489b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
490b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def seek(self, offset, whence=os.SEEK_SET):
491b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """Set the file's current offset.
492b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
493b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Note if the new offset is out of bound, it is adjusted to either 0 or EOF.
494b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
495b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Args:
496b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      offset: seek offset as number.
497b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      whence: seek mode. Supported modes are os.SEEK_SET (absolute seek),
498b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        os.SEEK_CUR (seek relative to the current position), and os.SEEK_END
499b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        (seek relative to the end, offset should be negative).
500b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
501b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Raises:
502b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      IOError: When this buffer is closed.
503b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      ValueError: When whence is invalid.
504b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
505b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._check_open()
506b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
507b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._buffer.reset()
508b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._buffer_future = None
509b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
510b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    if whence == os.SEEK_SET:
511b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      self._offset = offset
512b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    elif whence == os.SEEK_CUR:
513b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      self._offset += offset
514b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    elif whence == os.SEEK_END:
515b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      self._offset = self._file_size + offset
516b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    else:
517b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      raise ValueError('Whence mode %s is invalid.' % str(whence))
518b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
519b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._offset = min(self._offset, self._file_size)
520b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._offset = max(self._offset, 0)
521b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    if self._remaining():
522b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      self._request_next_buffer()
523b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
524b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def tell(self):
525b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """Tell the file's current offset.
526b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
527b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Returns:
528b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      current offset in reading this file.
529b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
530b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Raises:
531b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      IOError: When this buffer is closed.
532b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
533b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._check_open()
534b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    return self._offset
535b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
536b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def _check_open(self):
537b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    if self.closed:
538b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      raise IOError('Buffer is closed.')
539b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
540b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def seekable(self):
541b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    return True
542b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
543b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def readable(self):
544b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    return True
545b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
546b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def writable(self):
547b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    return False
548b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
549b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
550b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikclass _Buffer(object):
551b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  """In memory buffer."""
552b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
553b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def __init__(self):
554b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self.reset()
555b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
556b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def reset(self, content='', offset=0):
557b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._buffer = content
558b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._offset = offset
559b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
560b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def read(self, size=-1):
561b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """Returns bytes from self._buffer and update related offsets.
562b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
563b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Args:
564b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      size: number of bytes to read starting from current offset.
565b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        Read the entire buffer if negative.
566b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
567b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Returns:
568b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      Requested bytes from buffer.
569b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
570b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    if size < 0:
571b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      offset = len(self._buffer)
572b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    else:
573b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      offset = self._offset + size
574b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    return self.read_to_offset(offset)
575b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
576b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def read_to_offset(self, offset):
577b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """Returns bytes from self._buffer and update related offsets.
578b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
579b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Args:
580b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      offset: read from current offset to this offset, exclusive.
581b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
582b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Returns:
583b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      Requested bytes from buffer.
584b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
585b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    assert offset >= self._offset
586b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    result = self._buffer[self._offset: offset]
587b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._offset += len(result)
588b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    return result
589b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
590b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def remaining(self):
591b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    return len(self._buffer) - self._offset
592b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
593b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def find_newline(self, size=-1):
594b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """Search for newline char in buffer starting from current offset.
595b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
596b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Args:
597b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      size: number of bytes to search. -1 means all.
598b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
599b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Returns:
600b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      offset of newline char in buffer. -1 if doesn't exist.
601b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
602b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    if size < 0:
603b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      return self._buffer.find('\n', self._offset)
604b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    return self._buffer.find('\n', self._offset, self._offset + size)
605b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
606b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
607b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikclass StreamingBuffer(object):
608b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  """A class for creating large objects using the 'resumable' API.
609b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
610b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  The API is a subset of the Python writable stream API sufficient to
611b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  support writing zip files using the zipfile module.
612b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
613b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  The exact sequence of calls and use of headers is documented at
614b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  https://developers.google.com/storage/docs/developer-guide#unknownresumables
615b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  """
616b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
617b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  _blocksize = 256 * 1024
618b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
619b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  _flushsize = 8 * _blocksize
620b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
621b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  _maxrequestsize = 9 * 4 * _blocksize
622b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
623b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def __init__(self,
624b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik               api,
625b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik               path,
626b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik               content_type=None,
627b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik               gcs_headers=None):
628b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """Constructor.
629b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
630b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Args:
631b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      api: A StorageApi instance.
632b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      path: Quoted/escaped path to the object, e.g. /mybucket/myfile
633b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      content_type: Optional content-type; Default value is
634b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        delegate to Google Cloud Storage.
635b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      gcs_headers: additional gs headers as a str->str dict, e.g
636b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        {'x-goog-acl': 'private', 'x-goog-meta-foo': 'foo'}.
637b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Raises:
638b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      IOError: When this location can not be found.
639b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
640b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    assert self._maxrequestsize > self._blocksize
641b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    assert self._maxrequestsize % self._blocksize == 0
642b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    assert self._maxrequestsize >= self._flushsize
643b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
644b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._api = api
645b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._path = path
646b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
647b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self.name = api_utils._unquote_filename(path)
648b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self.closed = False
649b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
650b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._buffer = collections.deque()
651b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._buffered = 0
652b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._written = 0
653b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._offset = 0
654b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
655b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    headers = {'x-goog-resumable': 'start'}
656b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    if content_type:
657b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      headers['content-type'] = content_type
658b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    if gcs_headers:
659b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      headers.update(gcs_headers)
660b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    status, resp_headers, content = self._api.post_object(path, headers=headers)
661b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    errors.check_status(status, [201], path, headers, resp_headers,
662b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik                        body=content)
663b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    loc = resp_headers.get('location')
664b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    if not loc:
665b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      raise IOError('No location header found in 201 response')
666b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    parsed = urlparse.urlparse(loc)
667b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._path_with_token = '%s?%s' % (self._path, parsed.query)
668b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
669b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def __getstate__(self):
670b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """Store state as part of serialization/pickling.
671b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
672b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    The contents of the write buffer are stored. Writes to the underlying
673b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    storage are required to be on block boundaries (_blocksize) except for the
674b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    last write. In the worst case the pickled version of this object may be
675b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    slightly larger than the blocksize.
676b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
677b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Returns:
678b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      A dictionary with the state of this object
679b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
680b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
681b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    return {'api': self._api,
682b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik            'path': self._path,
683b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik            'path_token': self._path_with_token,
684b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik            'buffer': self._buffer,
685b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik            'buffered': self._buffered,
686b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik            'written': self._written,
687b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik            'offset': self._offset,
688b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik            'closed': self.closed}
689b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
690b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def __setstate__(self, state):
691b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """Restore state as part of deserialization/unpickling.
692b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
693b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Args:
694b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      state: the dictionary from a __getstate__ call
695b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
696b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._api = state['api']
697b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._path_with_token = state['path_token']
698b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._buffer = state['buffer']
699b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._buffered = state['buffered']
700b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._written = state['written']
701b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._offset = state['offset']
702b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self.closed = state['closed']
703b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._path = state['path']
704b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self.name = api_utils._unquote_filename(self._path)
705b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
706b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def write(self, data):
707b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """Write some bytes.
708b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
709b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Args:
710b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      data: data to write. str.
711b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
712b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Raises:
713b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      TypeError: if data is not of type str.
714b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
715b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._check_open()
716b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    if not isinstance(data, str):
717b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      raise TypeError('Expected str but got %s.' % type(data))
718b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    if not data:
719b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      return
720b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._buffer.append(data)
721b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._buffered += len(data)
722b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._offset += len(data)
723b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    if self._buffered >= self._flushsize:
724b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      self._flush()
725b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
726b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def flush(self):
727b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """Flush as much as possible to GCS.
728b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
729b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    GCS *requires* that all writes except for the final one align on
730b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    256KB boundaries. So the internal buffer may still have < 256KB bytes left
731b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    after flush.
732b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
733b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._check_open()
734b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._flush(finish=False)
735b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
736b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def tell(self):
737b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """Return the total number of bytes passed to write() so far.
738b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
739b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    (There is no seek() method.)
740b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
741b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    return self._offset
742b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
743b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def close(self):
744b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """Flush the buffer and finalize the file.
745b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
746b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    When this returns the new file is available for reading.
747b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
748b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    if not self.closed:
749b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      self.closed = True
750b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      self._flush(finish=True)
751b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      self._buffer = None
752b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
753b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def __enter__(self):
754b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    return self
755b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
756b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def __exit__(self, atype, value, traceback):
757b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self.close()
758b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    return False
759b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
760b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def _flush(self, finish=False):
761b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """Internal API to flush.
762b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
763b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Buffer is flushed to GCS only when the total amount of buffered data is at
764b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    least self._blocksize, or to flush the final (incomplete) block of
765b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    the file with finish=True.
766b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
767b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    while ((finish and self._buffered >= 0) or
768b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik           (not finish and self._buffered >= self._blocksize)):
769b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      tmp_buffer = []
770b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      tmp_buffer_len = 0
771b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
772b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      excess = 0
773b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      while self._buffer:
774b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        buf = self._buffer.popleft()
775b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        size = len(buf)
776b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        self._buffered -= size
777b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        tmp_buffer.append(buf)
778b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        tmp_buffer_len += size
779b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        if tmp_buffer_len >= self._maxrequestsize:
780b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik          excess = tmp_buffer_len - self._maxrequestsize
781b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik          break
782b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        if not finish and (
783b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik            tmp_buffer_len % self._blocksize + self._buffered <
784b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik            self._blocksize):
785b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik          excess = tmp_buffer_len % self._blocksize
786b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik          break
787b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
788b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      if excess:
789b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        over = tmp_buffer.pop()
790b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        size = len(over)
791b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        assert size >= excess
792b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        tmp_buffer_len -= size
793b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        head, tail = over[:-excess], over[-excess:]
794b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        self._buffer.appendleft(tail)
795b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        self._buffered += len(tail)
796b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        if head:
797b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik          tmp_buffer.append(head)
798b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik          tmp_buffer_len += len(head)
799b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
800b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      data = ''.join(tmp_buffer)
801b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      file_len = '*'
802b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      if finish and not self._buffered:
803b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        file_len = self._written + len(data)
804b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      self._send_data(data, self._written, file_len)
805b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      self._written += len(data)
806b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      if file_len != '*':
807b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        break
808b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
809b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def _send_data(self, data, start_offset, file_len):
810b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """Send the block to the storage service.
811b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
812b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    This is a utility method that does not modify self.
813b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
814b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Args:
815b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      data: data to send in str.
816b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      start_offset: start offset of the data in relation to the file.
817b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      file_len: an int if this is the last data to append to the file.
818b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        Otherwise '*'.
819b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
820b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    headers = {}
821b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    end_offset = start_offset + len(data) - 1
822b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
823b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    if data:
824b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      headers['content-range'] = ('bytes %d-%d/%s' %
825b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik                                  (start_offset, end_offset, file_len))
826b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    else:
827b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      headers['content-range'] = ('bytes */%s' % file_len)
828b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
829b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    status, response_headers, content = self._api.put_object(
830b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        self._path_with_token, payload=data, headers=headers)
831b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    if file_len == '*':
832b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      expected = 308
833b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    else:
834b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      expected = 200
835b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    errors.check_status(status, [expected], self._path, headers,
836b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik                        response_headers, content,
837b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik                        {'upload_path': self._path_with_token})
838b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
839b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def _get_offset_from_gcs(self):
840b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """Get the last offset that has been written to GCS.
841b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
842b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    This is a utility method that does not modify self.
843b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
844b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Returns:
845b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      an int of the last offset written to GCS by this upload, inclusive.
846b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      -1 means nothing has been written.
847b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
848b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    headers = {'content-range': 'bytes */*'}
849b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    status, response_headers, content = self._api.put_object(
850b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        self._path_with_token, headers=headers)
851b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    errors.check_status(status, [308], self._path, headers,
852b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik                        response_headers, content,
853b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik                        {'upload_path': self._path_with_token})
854b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    val = response_headers.get('range')
855b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    if val is None:
856b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      return -1
857b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    _, offset = val.rsplit('-', 1)
858b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    return int(offset)
859b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
860b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def _force_close(self, file_length=None):
861b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """Close this buffer on file_length.
862b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
863b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Finalize this upload immediately on file_length.
864b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Contents that are still in memory will not be uploaded.
865b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
866b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    This is a utility method that does not modify self.
867b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
868b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Args:
869b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      file_length: file length. Must match what has been uploaded. If None,
870b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        it will be queried from GCS.
871b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
872b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    if file_length is None:
873b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      file_length = self._get_offset_from_gcs() + 1
874b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    self._send_data('', 0, file_length)
875b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
876b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def _check_open(self):
877b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    if self.closed:
878b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik      raise IOError('Buffer is closed.')
879b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
880b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def seekable(self):
881b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    return False
882b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
883b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def readable(self):
884b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    return False
885b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
886b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik  def writable(self):
887b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    return True
888