1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "nacl_io/httpfs/http_fs_node.h"
6
7#include <assert.h>
8#include <errno.h>
9#include <stdio.h>
10#include <string.h>
11
12#include <ppapi/c/pp_errors.h>
13
14#include "nacl_io/httpfs/http_fs.h"
15#include "nacl_io/kernel_handle.h"
16#include "nacl_io/osinttypes.h"
17
18#if defined(WIN32)
19#define snprintf _snprintf
20#endif
21
22namespace nacl_io {
23
24namespace {
25
26// If we're attempting to read a partial request, but the server returns a full
27// request, we need to read all of the data up to the start of our partial
28// request into a dummy buffer. This is the maximum size of that buffer.
29const int MAX_READ_BUFFER_SIZE = 64 * 1024;
30const int32_t STATUSCODE_OK = 200;
31const int32_t STATUSCODE_PARTIAL_CONTENT = 206;
32const int32_t STATUSCODE_FORBIDDEN = 403;
33const int32_t STATUSCODE_NOT_FOUND = 404;
34const int32_t STATUSCODE_REQUESTED_RANGE_NOT_SATISFIABLE = 416;
35
36StringMap_t ParseHeaders(const char* headers, int32_t headers_length) {
37  enum State {
38    FINDING_KEY,
39    SKIPPING_WHITESPACE,
40    FINDING_VALUE,
41  };
42
43  StringMap_t result;
44  std::string key;
45  std::string value;
46
47  State state = FINDING_KEY;
48  const char* start = headers;
49  for (int i = 0; i < headers_length; ++i) {
50    switch (state) {
51      case FINDING_KEY:
52        if (headers[i] == ':') {
53          // Found key.
54          key.assign(start, &headers[i] - start);
55          key = NormalizeHeaderKey(key);
56          state = SKIPPING_WHITESPACE;
57        }
58        break;
59
60      case SKIPPING_WHITESPACE:
61        if (headers[i] == ' ') {
62          // Found whitespace, keep going...
63          break;
64        }
65
66        // Found a non-whitespace, mark this as the start of the value.
67        start = &headers[i];
68        state = FINDING_VALUE;
69      // Fallthrough to start processing value without incrementing i.
70
71      case FINDING_VALUE:
72        if (headers[i] == '\n') {
73          // Found value.
74          value.assign(start, &headers[i] - start);
75          result[key] = value;
76          start = &headers[i + 1];
77          state = FINDING_KEY;
78        }
79        break;
80    }
81  }
82
83  return result;
84}
85
86bool ParseContentLength(const StringMap_t& headers, off_t* content_length) {
87  StringMap_t::const_iterator iter = headers.find("Content-Length");
88  if (iter == headers.end())
89    return false;
90
91  *content_length = strtoull(iter->second.c_str(), NULL, 10);
92  return true;
93}
94
95bool ParseContentRange(const StringMap_t& headers,
96                       off_t* read_start,
97                       off_t* read_end,
98                       off_t* entity_length) {
99  StringMap_t::const_iterator iter = headers.find("Content-Range");
100  if (iter == headers.end())
101    return false;
102
103  // The key should look like "bytes ##-##/##" or "bytes ##-##/*". The last
104  // value is the entity length, which can potentially be * (i.e. unknown).
105  off_t read_start_int;
106  off_t read_end_int;
107  off_t entity_length_int;
108  int result = sscanf(iter->second.c_str(),
109                      "bytes %" SCNi64 "-%" SCNi64 "/%" SCNi64,
110                      &read_start_int,
111                      &read_end_int,
112                      &entity_length_int);
113
114  // The Content-Range header specifies an inclusive range: e.g. the first ten
115  // bytes is "bytes 0-9/*". Convert it to a half-open range by incrementing
116  // read_end.
117  if (result == 2) {
118    if (read_start)
119      *read_start = read_start_int;
120    if (read_end)
121      *read_end = read_end_int + 1;
122    if (entity_length)
123      *entity_length = 0;
124    return true;
125  } else if (result == 3) {
126    if (read_start)
127      *read_start = read_start_int;
128    if (read_end)
129      *read_end = read_end_int + 1;
130    if (entity_length)
131      *entity_length = entity_length_int;
132    return true;
133  }
134
135  return false;
136}
137
138// Maps an HTTP |status_code| onto the appropriate errno code.
139int HTTPStatusCodeToErrno(int status_code) {
140  switch (status_code) {
141    case STATUSCODE_OK:
142    case STATUSCODE_PARTIAL_CONTENT:
143      return 0;
144    case STATUSCODE_FORBIDDEN:
145      return EACCES;
146    case STATUSCODE_NOT_FOUND:
147      return ENOENT;
148  }
149  if (status_code >= 400 && status_code < 500)
150    return EINVAL;
151  return EIO;
152}
153
154}  // namespace
155
156void HttpFsNode::SetCachedSize(off_t size) {
157  has_cached_size_ = true;
158  stat_.st_size = size;
159}
160
161Error HttpFsNode::FSync() {
162  return EACCES;
163}
164
165Error HttpFsNode::GetDents(size_t offs,
166                           struct dirent* pdir,
167                           size_t count,
168                           int* out_bytes) {
169  *out_bytes = 0;
170  return EACCES;
171}
172
173Error HttpFsNode::GetStat(struct stat* stat) {
174  AUTO_LOCK(node_lock_);
175  return GetStat_Locked(stat);
176}
177
178Error HttpFsNode::Read(const HandleAttr& attr,
179                       void* buf,
180                       size_t count,
181                       int* out_bytes) {
182  *out_bytes = 0;
183
184  AUTO_LOCK(node_lock_);
185  if (cache_content_) {
186    if (cached_data_.empty()) {
187      Error error = DownloadToCache();
188      if (error)
189        return error;
190    }
191
192    return ReadPartialFromCache(attr, buf, count, out_bytes);
193  }
194
195  return DownloadPartial(attr, buf, count, out_bytes);
196}
197
198Error HttpFsNode::FTruncate(off_t size) {
199  return EACCES;
200}
201
202Error HttpFsNode::Write(const HandleAttr& attr,
203                        const void* buf,
204                        size_t count,
205                        int* out_bytes) {
206  // TODO(binji): support POST?
207  *out_bytes = 0;
208  return EACCES;
209}
210
211Error HttpFsNode::GetSize(off_t* out_size) {
212  *out_size = 0;
213
214  // TODO(binji): This value should be cached properly; i.e. obey the caching
215  // headers returned by the server.
216  AUTO_LOCK(node_lock_);
217  struct stat statbuf;
218  Error error = GetStat_Locked(&statbuf);
219  if (error)
220    return error;
221
222  *out_size = stat_.st_size;
223  return 0;
224}
225
226HttpFsNode::HttpFsNode(Filesystem* filesystem,
227                       const std::string& url,
228                       bool cache_content)
229    : Node(filesystem),
230      url_(url),
231      buffer_(NULL),
232      buffer_len_(0),
233      cache_content_(cache_content),
234      has_cached_size_(false) {
235  // http nodes are read-only by default
236  SetMode(S_IRALL);
237}
238
239HttpFsNode::~HttpFsNode() {
240  free(buffer_);
241}
242
243Error HttpFsNode::GetStat_Locked(struct stat* stat) {
244  // Assume we need to 'HEAD' if we do not know the size, otherwise, assume
245  // that the information is constant.  We can add a timeout if needed.
246  HttpFs* filesystem = static_cast<HttpFs*>(filesystem_);
247  if (!has_cached_size_ || !filesystem->cache_stat_) {
248    StringMap_t headers;
249    ScopedResource loader(filesystem_->ppapi());
250    ScopedResource request(filesystem_->ppapi());
251    ScopedResource response(filesystem_->ppapi());
252    int32_t statuscode;
253    StringMap_t response_headers;
254    const char* method = "HEAD";
255
256    if (filesystem->is_blob_url_) {
257      // Blob URLs do not support HEAD requests, but do give the content length
258      // in their response headers. We issue a single-byte GET request to
259      // retrieve the content length.
260      method = "GET";
261      headers["Range"] = "bytes=0-0";
262    }
263
264    Error error = OpenUrl(method,
265                          &headers,
266                          &loader,
267                          &request,
268                          &response,
269                          &statuscode,
270                          &response_headers);
271    if (error)
272      return error;
273
274    off_t entity_length;
275    if (ParseContentRange(response_headers, NULL, NULL, &entity_length)) {
276      SetCachedSize(static_cast<off_t>(entity_length));
277    } else if (ParseContentLength(response_headers, &entity_length)) {
278      SetCachedSize(static_cast<off_t>(entity_length));
279    } else if (cache_content_) {
280      // The server didn't give a content length; download the data to memory
281      // via DownloadToCache, which will also set stat_.st_size;
282      error = DownloadToCache();
283      if (error)
284        return error;
285    } else {
286      // The user doesn't want to cache content, but we didn't get a
287      // "Content-Length" header. Read the entire entity, and throw it away.
288      // Don't use DownloadToCache, as that will still allocate enough memory
289      // for the entire entity.
290      off_t bytes_read;
291      error = DownloadToTemp(&bytes_read);
292      if (error)
293        return error;
294
295      SetCachedSize(bytes_read);
296    }
297
298    stat_.st_atime = 0;  // TODO(binji): Use "Last-Modified".
299    stat_.st_mtime = 0;
300    stat_.st_ctime = 0;
301
302    SetType(S_IFREG);
303  }
304
305  // Fill the stat structure if provided
306  if (stat)
307    *stat = stat_;
308
309  return 0;
310}
311
312Error HttpFsNode::OpenUrl(const char* method,
313                          StringMap_t* request_headers,
314                          ScopedResource* out_loader,
315                          ScopedResource* out_request,
316                          ScopedResource* out_response,
317                          int32_t* out_statuscode,
318                          StringMap_t* out_response_headers) {
319  // Clear all out parameters.
320  *out_statuscode = 0;
321  out_response_headers->clear();
322
323  // Assume lock_ is already held.
324  PepperInterface* ppapi = filesystem_->ppapi();
325
326  HttpFs* mount_http = static_cast<HttpFs*>(filesystem_);
327  out_request->Reset(
328      mount_http->MakeUrlRequestInfo(url_, method, request_headers));
329  if (!out_request->pp_resource())
330    return EINVAL;
331
332  URLLoaderInterface* loader_interface = ppapi->GetURLLoaderInterface();
333  URLResponseInfoInterface* response_interface =
334      ppapi->GetURLResponseInfoInterface();
335  VarInterface* var_interface = ppapi->GetVarInterface();
336
337  out_loader->Reset(loader_interface->Create(ppapi->GetInstance()));
338  if (!out_loader->pp_resource())
339    return EINVAL;
340
341  int32_t result = loader_interface->Open(out_loader->pp_resource(),
342                                          out_request->pp_resource(),
343                                          PP_BlockUntilComplete());
344  if (result != PP_OK)
345    return PPErrorToErrno(result);
346
347  out_response->Reset(
348      loader_interface->GetResponseInfo(out_loader->pp_resource()));
349  if (!out_response->pp_resource())
350    return EINVAL;
351
352  // Get response statuscode.
353  PP_Var statuscode = response_interface->GetProperty(
354      out_response->pp_resource(), PP_URLRESPONSEPROPERTY_STATUSCODE);
355
356  if (statuscode.type != PP_VARTYPE_INT32)
357    return EINVAL;
358
359  *out_statuscode = statuscode.value.as_int;
360
361  // Only accept OK or Partial Content.
362  Error error = HTTPStatusCodeToErrno(*out_statuscode);
363  if (error)
364    return error;
365
366  // Get response headers.
367  PP_Var response_headers_var = response_interface->GetProperty(
368      out_response->pp_resource(), PP_URLRESPONSEPROPERTY_HEADERS);
369
370  uint32_t response_headers_length;
371  const char* response_headers_str =
372      var_interface->VarToUtf8(response_headers_var, &response_headers_length);
373
374  *out_response_headers =
375      ParseHeaders(response_headers_str, response_headers_length);
376
377  var_interface->Release(response_headers_var);
378
379  return 0;
380}
381
382Error HttpFsNode::DownloadToCache() {
383  StringMap_t headers;
384  ScopedResource loader(filesystem_->ppapi());
385  ScopedResource request(filesystem_->ppapi());
386  ScopedResource response(filesystem_->ppapi());
387  int32_t statuscode;
388  StringMap_t response_headers;
389  Error error = OpenUrl("GET",
390                        &headers,
391                        &loader,
392                        &request,
393                        &response,
394                        &statuscode,
395                        &response_headers);
396  if (error)
397    return error;
398
399  off_t content_length = 0;
400  if (ParseContentLength(response_headers, &content_length)) {
401    cached_data_.resize(content_length);
402    int real_size;
403    error = ReadResponseToBuffer(
404        loader, cached_data_.data(), content_length, &real_size);
405    if (error)
406      return error;
407
408    SetCachedSize(real_size);
409    cached_data_.resize(real_size);
410    return 0;
411  }
412
413  int bytes_read;
414  error = ReadEntireResponseToCache(loader, &bytes_read);
415  if (error)
416    return error;
417
418  SetCachedSize(bytes_read);
419  return 0;
420}
421
422Error HttpFsNode::ReadPartialFromCache(const HandleAttr& attr,
423                                       void* buf,
424                                       int count,
425                                       int* out_bytes) {
426  *out_bytes = 0;
427  off_t size = cached_data_.size();
428
429  if (attr.offs + count > size)
430    count = size - attr.offs;
431
432  if (count <= 0)
433    return 0;
434
435  memcpy(buf, &cached_data_.data()[attr.offs], count);
436  *out_bytes = count;
437  return 0;
438}
439
440Error HttpFsNode::DownloadPartial(const HandleAttr& attr,
441                                  void* buf,
442                                  off_t count,
443                                  int* out_bytes) {
444  *out_bytes = 0;
445
446  StringMap_t headers;
447
448  char buffer[100];
449  // Range request is inclusive: 0-99 returns 100 bytes.
450  snprintf(&buffer[0],
451           sizeof(buffer),
452           "bytes=%" PRIi64 "-%" PRIi64,
453           attr.offs,
454           attr.offs + count - 1);
455  headers["Range"] = buffer;
456
457  ScopedResource loader(filesystem_->ppapi());
458  ScopedResource request(filesystem_->ppapi());
459  ScopedResource response(filesystem_->ppapi());
460  int32_t statuscode;
461  StringMap_t response_headers;
462  Error error = OpenUrl("GET",
463                        &headers,
464                        &loader,
465                        &request,
466                        &response,
467                        &statuscode,
468                        &response_headers);
469  if (error) {
470    if (statuscode == STATUSCODE_REQUESTED_RANGE_NOT_SATISFIABLE) {
471      // We're likely trying to read past the end. Return 0 bytes.
472      *out_bytes = 0;
473      return 0;
474    }
475
476    return error;
477  }
478
479  off_t read_start = 0;
480  if (statuscode == STATUSCODE_OK) {
481    // No partial result, read everything starting from the part we care about.
482    off_t content_length;
483    if (ParseContentLength(response_headers, &content_length)) {
484      if (attr.offs >= content_length)
485        return EINVAL;
486
487      // Clamp count, if trying to read past the end of the file.
488      if (attr.offs + count > content_length) {
489        count = content_length - attr.offs;
490      }
491    }
492  } else if (statuscode == STATUSCODE_PARTIAL_CONTENT) {
493    // Determine from the headers where we are reading.
494    off_t read_end;
495    off_t entity_length;
496    if (ParseContentRange(
497            response_headers, &read_start, &read_end, &entity_length)) {
498      if (read_start > attr.offs || read_start > read_end) {
499        // If this error occurs, the server is returning bogus values.
500        return EINVAL;
501      }
502
503      // Clamp count, if trying to read past the end of the file.
504      count = std::min(read_end - read_start, count);
505    } else {
506      // Partial Content without Content-Range. Assume that the server gave us
507      // exactly what we asked for. This can happen even when the server
508      // returns 200 -- the cache may return 206 in this case, but not modify
509      // the headers.
510      read_start = attr.offs;
511    }
512  }
513
514  if (read_start < attr.offs) {
515    // We aren't yet at the location where we want to start reading. Read into
516    // our dummy buffer until then.
517    int bytes_to_read = attr.offs - read_start;
518    int bytes_read;
519    error = ReadResponseToTemp(loader, bytes_to_read, &bytes_read);
520    if (error)
521      return error;
522
523    // Tried to read past the end of the entity.
524    if (bytes_read < bytes_to_read) {
525      *out_bytes = 0;
526      return 0;
527    }
528  }
529
530  return ReadResponseToBuffer(loader, buf, count, out_bytes);
531}
532
533Error HttpFsNode::DownloadToTemp(off_t* out_bytes) {
534  StringMap_t headers;
535  ScopedResource loader(filesystem_->ppapi());
536  ScopedResource request(filesystem_->ppapi());
537  ScopedResource response(filesystem_->ppapi());
538  int32_t statuscode;
539  StringMap_t response_headers;
540  Error error = OpenUrl("GET",
541                        &headers,
542                        &loader,
543                        &request,
544                        &response,
545                        &statuscode,
546                        &response_headers);
547  if (error)
548    return error;
549
550  off_t content_length = 0;
551  if (ParseContentLength(response_headers, &content_length)) {
552    *out_bytes = content_length;
553    return 0;
554  }
555
556  return ReadEntireResponseToTemp(loader, out_bytes);
557}
558
559Error HttpFsNode::ReadEntireResponseToTemp(const ScopedResource& loader,
560                                           off_t* out_bytes) {
561  *out_bytes = 0;
562
563  const int kBytesToRead = MAX_READ_BUFFER_SIZE;
564  buffer_ = (char*)realloc(buffer_, kBytesToRead);
565  assert(buffer_);
566  if (!buffer_) {
567    buffer_len_ = 0;
568    return ENOMEM;
569  }
570  buffer_len_ = kBytesToRead;
571
572  while (true) {
573    int bytes_read;
574    Error error =
575        ReadResponseToBuffer(loader, buffer_, kBytesToRead, &bytes_read);
576    if (error)
577      return error;
578
579    *out_bytes += bytes_read;
580
581    if (bytes_read < kBytesToRead)
582      return 0;
583  }
584}
585
586Error HttpFsNode::ReadEntireResponseToCache(const ScopedResource& loader,
587                                            int* out_bytes) {
588  *out_bytes = 0;
589  const int kBytesToRead = MAX_READ_BUFFER_SIZE;
590
591  while (true) {
592    // Always recalculate the buf pointer because it may have moved when
593    // cached_data_ was resized.
594    cached_data_.resize(*out_bytes + kBytesToRead);
595    void* buf = cached_data_.data() + *out_bytes;
596
597    int bytes_read;
598    Error error = ReadResponseToBuffer(loader, buf, kBytesToRead, &bytes_read);
599    if (error)
600      return error;
601
602    *out_bytes += bytes_read;
603
604    if (bytes_read < kBytesToRead) {
605      // Shrink the cached data buffer to the correct size.
606      cached_data_.resize(*out_bytes);
607      return 0;
608    }
609  }
610}
611
612Error HttpFsNode::ReadResponseToTemp(const ScopedResource& loader,
613                                     int count,
614                                     int* out_bytes) {
615  *out_bytes = 0;
616
617  if (buffer_len_ < count) {
618    int new_len = std::min(count, MAX_READ_BUFFER_SIZE);
619    buffer_ = (char*)realloc(buffer_, new_len);
620    assert(buffer_);
621    if (!buffer_) {
622      buffer_len_ = 0;
623      return ENOMEM;
624    }
625    buffer_len_ = new_len;
626  }
627
628  int bytes_left = count;
629  while (bytes_left > 0) {
630    int bytes_to_read = std::min(bytes_left, buffer_len_);
631    int bytes_read;
632    Error error = ReadResponseToBuffer(
633        loader, buffer_, bytes_to_read, &bytes_read);
634    if (error)
635      return error;
636
637    if (bytes_read == 0)
638      return 0;
639
640    bytes_left -= bytes_read;
641    *out_bytes += bytes_read;
642  }
643
644  return 0;
645}
646
647Error HttpFsNode::ReadResponseToBuffer(const ScopedResource& loader,
648                                       void* buf,
649                                       int count,
650                                       int* out_bytes) {
651  *out_bytes = 0;
652
653  PepperInterface* ppapi = filesystem_->ppapi();
654  URLLoaderInterface* loader_interface = ppapi->GetURLLoaderInterface();
655
656  char* out_buffer = static_cast<char*>(buf);
657  int bytes_to_read = count;
658  while (bytes_to_read > 0) {
659    int bytes_read =
660        loader_interface->ReadResponseBody(loader.pp_resource(),
661                                           out_buffer,
662                                           bytes_to_read,
663                                           PP_BlockUntilComplete());
664
665    if (bytes_read == 0) {
666      // This is not an error -- it may just be that we were trying to read
667      // more data than exists.
668      *out_bytes = count - bytes_to_read;
669      return 0;
670    }
671
672    if (bytes_read < 0)
673      return PPErrorToErrno(bytes_read);
674
675    assert(bytes_read <= bytes_to_read);
676    bytes_to_read -= bytes_read;
677    out_buffer += bytes_read;
678  }
679
680  *out_bytes = count;
681  return 0;
682}
683
684}  // namespace nacl_io
685