http_fs_node.cc revision 116680a4aac90f2aa7413d9095a592090648e557
1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "nacl_io/httpfs/http_fs_node.h"
6
7#include <assert.h>
8#include <errno.h>
9#include <stdio.h>
10#include <string.h>
11
12#include <ppapi/c/pp_errors.h>
13
14#include "nacl_io/httpfs/http_fs.h"
15#include "nacl_io/kernel_handle.h"
16#include "nacl_io/osinttypes.h"
17
18#if defined(WIN32)
19#define snprintf _snprintf
20#endif
21
22namespace nacl_io {
23
24namespace {
25
26// If we're attempting to read a partial request, but the server returns a full
27// request, we need to read all of the data up to the start of our partial
28// request into a dummy buffer. This is the maximum size of that buffer.
29const int MAX_READ_BUFFER_SIZE = 64 * 1024;
30const int32_t STATUSCODE_OK = 200;
31const int32_t STATUSCODE_PARTIAL_CONTENT = 206;
32const int32_t STATUSCODE_FORBIDDEN = 403;
33const int32_t STATUSCODE_NOT_FOUND = 404;
34const int32_t STATUSCODE_REQUESTED_RANGE_NOT_SATISFIABLE = 416;
35
36StringMap_t ParseHeaders(const char* headers, int32_t headers_length) {
37  enum State {
38    FINDING_KEY,
39    SKIPPING_WHITESPACE,
40    FINDING_VALUE,
41  };
42
43  StringMap_t result;
44  std::string key;
45  std::string value;
46
47  State state = FINDING_KEY;
48  const char* start = headers;
49  for (int i = 0; i < headers_length; ++i) {
50    switch (state) {
51      case FINDING_KEY:
52        if (headers[i] == ':') {
53          // Found key.
54          key.assign(start, &headers[i] - start);
55          key = NormalizeHeaderKey(key);
56          state = SKIPPING_WHITESPACE;
57        }
58        break;
59
60      case SKIPPING_WHITESPACE:
61        if (headers[i] == ' ') {
62          // Found whitespace, keep going...
63          break;
64        }
65
66        // Found a non-whitespace, mark this as the start of the value.
67        start = &headers[i];
68        state = FINDING_VALUE;
69      // Fallthrough to start processing value without incrementing i.
70
71      case FINDING_VALUE:
72        if (headers[i] == '\n') {
73          // Found value.
74          value.assign(start, &headers[i] - start);
75          result[key] = value;
76          start = &headers[i + 1];
77          state = FINDING_KEY;
78        }
79        break;
80    }
81  }
82
83  return result;
84}
85
86bool ParseContentLength(const StringMap_t& headers, off_t* content_length) {
87  StringMap_t::const_iterator iter = headers.find("Content-Length");
88  if (iter == headers.end())
89    return false;
90
91  *content_length = strtoull(iter->second.c_str(), NULL, 10);
92  return true;
93}
94
95bool ParseContentRange(const StringMap_t& headers,
96                       off_t* read_start,
97                       off_t* read_end,
98                       off_t* entity_length) {
99  StringMap_t::const_iterator iter = headers.find("Content-Range");
100  if (iter == headers.end())
101    return false;
102
103  // The key should look like "bytes ##-##/##" or "bytes ##-##/*". The last
104  // value is the entity length, which can potentially be * (i.e. unknown).
105  off_t read_start_int;
106  off_t read_end_int;
107  off_t entity_length_int;
108  int result = sscanf(iter->second.c_str(),
109                      "bytes %" SCNi64 "-%" SCNi64 "/%" SCNi64,
110                      &read_start_int,
111                      &read_end_int,
112                      &entity_length_int);
113
114  // The Content-Range header specifies an inclusive range: e.g. the first ten
115  // bytes is "bytes 0-9/*". Convert it to a half-open range by incrementing
116  // read_end.
117  if (result == 2) {
118    if (read_start)
119      *read_start = read_start_int;
120    if (read_end)
121      *read_end = read_end_int + 1;
122    if (entity_length)
123      *entity_length = 0;
124    return true;
125  } else if (result == 3) {
126    if (read_start)
127      *read_start = read_start_int;
128    if (read_end)
129      *read_end = read_end_int + 1;
130    if (entity_length)
131      *entity_length = entity_length_int;
132    return true;
133  }
134
135  return false;
136}
137
138// Maps an HTTP |status_code| onto the appropriate errno code.
139int HTTPStatusCodeToErrno(int status_code) {
140  switch (status_code) {
141    case STATUSCODE_OK:
142    case STATUSCODE_PARTIAL_CONTENT:
143      return 0;
144    case STATUSCODE_FORBIDDEN:
145      return EACCES;
146    case STATUSCODE_NOT_FOUND:
147      return ENOENT;
148  }
149  if (status_code >= 400 && status_code < 500)
150    return EINVAL;
151  return EIO;
152}
153
154}  // namespace
155
156void HttpFsNode::SetCachedSize(off_t size) {
157  has_cached_size_ = true;
158  stat_.st_size = size;
159}
160
161Error HttpFsNode::FSync() {
162  return EACCES;
163}
164
165Error HttpFsNode::GetDents(size_t offs,
166                           struct dirent* pdir,
167                           size_t count,
168                           int* out_bytes) {
169  *out_bytes = 0;
170  return EACCES;
171}
172
173Error HttpFsNode::GetStat(struct stat* stat) {
174  AUTO_LOCK(node_lock_);
175  return GetStat_Locked(stat);
176}
177
178Error HttpFsNode::Read(const HandleAttr& attr,
179                       void* buf,
180                       size_t count,
181                       int* out_bytes) {
182  *out_bytes = 0;
183
184  AUTO_LOCK(node_lock_);
185  if (cache_content_) {
186    if (cached_data_.empty()) {
187      Error error = DownloadToCache();
188      if (error)
189        return error;
190    }
191
192    return ReadPartialFromCache(attr, buf, count, out_bytes);
193  }
194
195  return DownloadPartial(attr, buf, count, out_bytes);
196}
197
198Error HttpFsNode::FTruncate(off_t size) {
199  return EACCES;
200}
201
202Error HttpFsNode::Write(const HandleAttr& attr,
203                        const void* buf,
204                        size_t count,
205                        int* out_bytes) {
206  // TODO(binji): support POST?
207  *out_bytes = 0;
208  return EACCES;
209}
210
211Error HttpFsNode::GetSize(off_t* out_size) {
212  *out_size = 0;
213
214  // TODO(binji): This value should be cached properly; i.e. obey the caching
215  // headers returned by the server.
216  AUTO_LOCK(node_lock_);
217  struct stat statbuf;
218  Error error = GetStat_Locked(&statbuf);
219  if (error)
220    return error;
221
222  *out_size = stat_.st_size;
223  return 0;
224}
225
226HttpFsNode::HttpFsNode(Filesystem* filesystem,
227                       const std::string& url,
228                       bool cache_content)
229    : Node(filesystem),
230      url_(url),
231      cache_content_(cache_content),
232      has_cached_size_(false) {
233}
234
235void HttpFsNode::SetMode(int mode) {
236  stat_.st_mode = mode;
237}
238
239Error HttpFsNode::GetStat_Locked(struct stat* stat) {
240  // Assume we need to 'HEAD' if we do not know the size, otherwise, assume
241  // that the information is constant.  We can add a timeout if needed.
242  HttpFs* filesystem = static_cast<HttpFs*>(filesystem_);
243  if (!has_cached_size_ || !filesystem->cache_stat_) {
244    StringMap_t headers;
245    ScopedResource loader(filesystem_->ppapi());
246    ScopedResource request(filesystem_->ppapi());
247    ScopedResource response(filesystem_->ppapi());
248    int32_t statuscode;
249    StringMap_t response_headers;
250    const char* method = "HEAD";
251
252    if (filesystem->is_blob_url_) {
253      // Blob URLs do not support HEAD requests, but do give the content length
254      // in their response headers. We issue a single-byte GET request to
255      // retrieve the content length.
256      method = "GET";
257      headers["Range"] = "bytes=0-0";
258    }
259
260    Error error = OpenUrl(method,
261                          &headers,
262                          &loader,
263                          &request,
264                          &response,
265                          &statuscode,
266                          &response_headers);
267    if (error)
268      return error;
269
270    off_t entity_length;
271    if (ParseContentRange(response_headers, NULL, NULL, &entity_length)) {
272      SetCachedSize(static_cast<off_t>(entity_length));
273    } else if (ParseContentLength(response_headers, &entity_length)) {
274      SetCachedSize(static_cast<off_t>(entity_length));
275    } else if (cache_content_) {
276      // The server didn't give a content length; download the data to memory
277      // via DownloadToCache, which will also set stat_.st_size;
278      error = DownloadToCache();
279      if (error)
280        return error;
281    } else {
282      // The user doesn't want to cache content, but we didn't get a
283      // "Content-Length" header. Read the entire entity, and throw it away.
284      // Don't use DownloadToCache, as that will still allocate enough memory
285      // for the entire entity.
286      off_t bytes_read;
287      error = DownloadToTemp(&bytes_read);
288      if (error)
289        return error;
290
291      SetCachedSize(bytes_read);
292    }
293
294    stat_.st_atime = 0;  // TODO(binji): Use "Last-Modified".
295    stat_.st_mtime = 0;
296    stat_.st_ctime = 0;
297
298    stat_.st_mode |= S_IFREG;
299  }
300
301  // Fill the stat structure if provided
302  if (stat)
303    *stat = stat_;
304
305  return 0;
306}
307
308Error HttpFsNode::OpenUrl(const char* method,
309                          StringMap_t* request_headers,
310                          ScopedResource* out_loader,
311                          ScopedResource* out_request,
312                          ScopedResource* out_response,
313                          int32_t* out_statuscode,
314                          StringMap_t* out_response_headers) {
315  // Clear all out parameters.
316  *out_statuscode = 0;
317  out_response_headers->clear();
318
319  // Assume lock_ is already held.
320  PepperInterface* ppapi = filesystem_->ppapi();
321
322  HttpFs* mount_http = static_cast<HttpFs*>(filesystem_);
323  out_request->Reset(
324      mount_http->MakeUrlRequestInfo(url_, method, request_headers));
325  if (!out_request->pp_resource())
326    return EINVAL;
327
328  URLLoaderInterface* loader_interface = ppapi->GetURLLoaderInterface();
329  URLResponseInfoInterface* response_interface =
330      ppapi->GetURLResponseInfoInterface();
331  VarInterface* var_interface = ppapi->GetVarInterface();
332
333  out_loader->Reset(loader_interface->Create(ppapi->GetInstance()));
334  if (!out_loader->pp_resource())
335    return EINVAL;
336
337  int32_t result = loader_interface->Open(out_loader->pp_resource(),
338                                          out_request->pp_resource(),
339                                          PP_BlockUntilComplete());
340  if (result != PP_OK)
341    return PPErrorToErrno(result);
342
343  out_response->Reset(
344      loader_interface->GetResponseInfo(out_loader->pp_resource()));
345  if (!out_response->pp_resource())
346    return EINVAL;
347
348  // Get response statuscode.
349  PP_Var statuscode = response_interface->GetProperty(
350      out_response->pp_resource(), PP_URLRESPONSEPROPERTY_STATUSCODE);
351
352  if (statuscode.type != PP_VARTYPE_INT32)
353    return EINVAL;
354
355  *out_statuscode = statuscode.value.as_int;
356
357  // Only accept OK or Partial Content.
358  Error error = HTTPStatusCodeToErrno(*out_statuscode);
359  if (error)
360    return error;
361
362  // Get response headers.
363  PP_Var response_headers_var = response_interface->GetProperty(
364      out_response->pp_resource(), PP_URLRESPONSEPROPERTY_HEADERS);
365
366  uint32_t response_headers_length;
367  const char* response_headers_str =
368      var_interface->VarToUtf8(response_headers_var, &response_headers_length);
369
370  *out_response_headers =
371      ParseHeaders(response_headers_str, response_headers_length);
372
373  var_interface->Release(response_headers_var);
374
375  return 0;
376}
377
378Error HttpFsNode::DownloadToCache() {
379  StringMap_t headers;
380  ScopedResource loader(filesystem_->ppapi());
381  ScopedResource request(filesystem_->ppapi());
382  ScopedResource response(filesystem_->ppapi());
383  int32_t statuscode;
384  StringMap_t response_headers;
385  Error error = OpenUrl("GET",
386                        &headers,
387                        &loader,
388                        &request,
389                        &response,
390                        &statuscode,
391                        &response_headers);
392  if (error)
393    return error;
394
395  off_t content_length = 0;
396  if (ParseContentLength(response_headers, &content_length)) {
397    cached_data_.resize(content_length);
398    int real_size;
399    error = ReadResponseToBuffer(
400        loader, cached_data_.data(), content_length, &real_size);
401    if (error)
402      return error;
403
404    SetCachedSize(real_size);
405    cached_data_.resize(real_size);
406    return 0;
407  }
408
409  int bytes_read;
410  error = ReadEntireResponseToCache(loader, &bytes_read);
411  if (error)
412    return error;
413
414  SetCachedSize(bytes_read);
415  return 0;
416}
417
418Error HttpFsNode::ReadPartialFromCache(const HandleAttr& attr,
419                                       void* buf,
420                                       int count,
421                                       int* out_bytes) {
422  *out_bytes = 0;
423  off_t size = cached_data_.size();
424
425  if (attr.offs + count > size)
426    count = size - attr.offs;
427
428  if (count <= 0)
429    return 0;
430
431  memcpy(buf, &cached_data_.data()[attr.offs], count);
432  *out_bytes = count;
433  return 0;
434}
435
436Error HttpFsNode::DownloadPartial(const HandleAttr& attr,
437                                  void* buf,
438                                  off_t count,
439                                  int* out_bytes) {
440  *out_bytes = 0;
441
442  StringMap_t headers;
443
444  char buffer[100];
445  // Range request is inclusive: 0-99 returns 100 bytes.
446  snprintf(&buffer[0],
447           sizeof(buffer),
448           "bytes=%" PRIi64 "-%" PRIi64,
449           attr.offs,
450           attr.offs + count - 1);
451  headers["Range"] = buffer;
452
453  ScopedResource loader(filesystem_->ppapi());
454  ScopedResource request(filesystem_->ppapi());
455  ScopedResource response(filesystem_->ppapi());
456  int32_t statuscode;
457  StringMap_t response_headers;
458  Error error = OpenUrl("GET",
459                        &headers,
460                        &loader,
461                        &request,
462                        &response,
463                        &statuscode,
464                        &response_headers);
465  if (error) {
466    if (statuscode == STATUSCODE_REQUESTED_RANGE_NOT_SATISFIABLE) {
467      // We're likely trying to read past the end. Return 0 bytes.
468      *out_bytes = 0;
469      return 0;
470    }
471
472    return error;
473  }
474
475  off_t read_start = 0;
476  if (statuscode == STATUSCODE_OK) {
477    // No partial result, read everything starting from the part we care about.
478    off_t content_length;
479    if (ParseContentLength(response_headers, &content_length)) {
480      if (attr.offs >= content_length)
481        return EINVAL;
482
483      // Clamp count, if trying to read past the end of the file.
484      if (attr.offs + count > content_length) {
485        count = content_length - attr.offs;
486      }
487    }
488  } else if (statuscode == STATUSCODE_PARTIAL_CONTENT) {
489    // Determine from the headers where we are reading.
490    off_t read_end;
491    off_t entity_length;
492    if (ParseContentRange(
493            response_headers, &read_start, &read_end, &entity_length)) {
494      if (read_start > attr.offs || read_start > read_end) {
495        // If this error occurs, the server is returning bogus values.
496        return EINVAL;
497      }
498
499      // Clamp count, if trying to read past the end of the file.
500      count = std::min(read_end - read_start, count);
501    } else {
502      // Partial Content without Content-Range. Assume that the server gave us
503      // exactly what we asked for. This can happen even when the server
504      // returns 200 -- the cache may return 206 in this case, but not modify
505      // the headers.
506      read_start = attr.offs;
507    }
508  }
509
510  if (read_start < attr.offs) {
511    // We aren't yet at the location where we want to start reading. Read into
512    // our dummy buffer until then.
513    int bytes_to_read = attr.offs - read_start;
514    int bytes_read;
515    error = ReadResponseToTemp(loader, bytes_to_read, &bytes_read);
516    if (error)
517      return error;
518
519    // Tried to read past the end of the entity.
520    if (bytes_read < bytes_to_read) {
521      *out_bytes = 0;
522      return 0;
523    }
524  }
525
526  return ReadResponseToBuffer(loader, buf, count, out_bytes);
527}
528
529Error HttpFsNode::DownloadToTemp(off_t* out_bytes) {
530  StringMap_t headers;
531  ScopedResource loader(filesystem_->ppapi());
532  ScopedResource request(filesystem_->ppapi());
533  ScopedResource response(filesystem_->ppapi());
534  int32_t statuscode;
535  StringMap_t response_headers;
536  Error error = OpenUrl("GET",
537                        &headers,
538                        &loader,
539                        &request,
540                        &response,
541                        &statuscode,
542                        &response_headers);
543  if (error)
544    return error;
545
546  off_t content_length = 0;
547  if (ParseContentLength(response_headers, &content_length)) {
548    *out_bytes = content_length;
549    return 0;
550  }
551
552  return ReadEntireResponseToTemp(loader, out_bytes);
553}
554
555Error HttpFsNode::ReadEntireResponseToTemp(const ScopedResource& loader,
556                                           off_t* out_bytes) {
557  *out_bytes = 0;
558
559  const int kBytesToRead = MAX_READ_BUFFER_SIZE;
560  buffer_.resize(kBytesToRead);
561
562  while (true) {
563    int bytes_read;
564    Error error =
565        ReadResponseToBuffer(loader, buffer_.data(), kBytesToRead, &bytes_read);
566    if (error)
567      return error;
568
569    *out_bytes += bytes_read;
570
571    if (bytes_read < kBytesToRead)
572      return 0;
573  }
574}
575
576Error HttpFsNode::ReadEntireResponseToCache(const ScopedResource& loader,
577                                            int* out_bytes) {
578  *out_bytes = 0;
579  const int kBytesToRead = MAX_READ_BUFFER_SIZE;
580
581  while (true) {
582    // Always recalculate the buf pointer because it may have moved when
583    // cached_data_ was resized.
584    cached_data_.resize(*out_bytes + kBytesToRead);
585    void* buf = cached_data_.data() + *out_bytes;
586
587    int bytes_read;
588    Error error = ReadResponseToBuffer(loader, buf, kBytesToRead, &bytes_read);
589    if (error)
590      return error;
591
592    *out_bytes += bytes_read;
593
594    if (bytes_read < kBytesToRead) {
595      // Shrink the cached data buffer to the correct size.
596      cached_data_.resize(*out_bytes);
597      return 0;
598    }
599  }
600}
601
602Error HttpFsNode::ReadResponseToTemp(const ScopedResource& loader,
603                                     int count,
604                                     int* out_bytes) {
605  *out_bytes = 0;
606
607  if (buffer_.size() < static_cast<size_t>(count))
608    buffer_.resize(std::min(count, MAX_READ_BUFFER_SIZE));
609
610  int bytes_left = count;
611  while (bytes_left > 0) {
612    int bytes_to_read =
613        std::min(static_cast<size_t>(bytes_left), buffer_.size());
614    int bytes_read;
615    Error error = ReadResponseToBuffer(
616        loader, buffer_.data(), bytes_to_read, &bytes_read);
617    if (error)
618      return error;
619
620    if (bytes_read == 0)
621      return 0;
622
623    bytes_left -= bytes_read;
624    *out_bytes += bytes_read;
625  }
626
627  return 0;
628}
629
630Error HttpFsNode::ReadResponseToBuffer(const ScopedResource& loader,
631                                       void* buf,
632                                       int count,
633                                       int* out_bytes) {
634  *out_bytes = 0;
635
636  PepperInterface* ppapi = filesystem_->ppapi();
637  URLLoaderInterface* loader_interface = ppapi->GetURLLoaderInterface();
638
639  char* out_buffer = static_cast<char*>(buf);
640  int bytes_to_read = count;
641  while (bytes_to_read > 0) {
642    int bytes_read =
643        loader_interface->ReadResponseBody(loader.pp_resource(),
644                                           out_buffer,
645                                           bytes_to_read,
646                                           PP_BlockUntilComplete());
647
648    if (bytes_read == 0) {
649      // This is not an error -- it may just be that we were trying to read
650      // more data than exists.
651      *out_bytes = count - bytes_to_read;
652      return 0;
653    }
654
655    if (bytes_read < 0)
656      return PPErrorToErrno(bytes_read);
657
658    assert(bytes_read <= bytes_to_read);
659    bytes_to_read -= bytes_read;
660    out_buffer += bytes_read;
661  }
662
663  *out_bytes = count;
664  return 0;
665}
666
667}  // namespace nacl_io
668