1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "nacl_io/httpfs/http_fs_node.h"
6
7#include <assert.h>
8#include <errno.h>
9#include <stdio.h>
10#include <string.h>
11
12#include <ppapi/c/pp_errors.h>
13
14#include "nacl_io/httpfs/http_fs.h"
15#include "nacl_io/kernel_handle.h"
16#include "nacl_io/osinttypes.h"
17
18#if defined(WIN32)
19#define snprintf _snprintf
20#endif
21
22namespace nacl_io {
23
24namespace {
25
26// If we're attempting to read a partial request, but the server returns a full
27// request, we need to read all of the data up to the start of our partial
28// request into a dummy buffer. This is the maximum size of that buffer.
29const int MAX_READ_BUFFER_SIZE = 64 * 1024;
30const int32_t STATUSCODE_OK = 200;
31const int32_t STATUSCODE_PARTIAL_CONTENT = 206;
32const int32_t STATUSCODE_FORBIDDEN = 403;
33const int32_t STATUSCODE_NOT_FOUND = 404;
34const int32_t STATUSCODE_REQUESTED_RANGE_NOT_SATISFIABLE = 416;
35
36StringMap_t ParseHeaders(const char* headers, int32_t headers_length) {
37  enum State {
38    FINDING_KEY,
39    SKIPPING_WHITESPACE,
40    FINDING_VALUE,
41  };
42
43  StringMap_t result;
44  std::string key;
45  std::string value;
46
47  State state = FINDING_KEY;
48  const char* start = headers;
49  for (int i = 0; i < headers_length; ++i) {
50    switch (state) {
51      case FINDING_KEY:
52        if (headers[i] == ':') {
53          // Found key.
54          key.assign(start, &headers[i] - start);
55          key = NormalizeHeaderKey(key);
56          state = SKIPPING_WHITESPACE;
57        }
58        break;
59
60      case SKIPPING_WHITESPACE:
61        if (headers[i] == ' ') {
62          // Found whitespace, keep going...
63          break;
64        }
65
66        // Found a non-whitespace, mark this as the start of the value.
67        start = &headers[i];
68        state = FINDING_VALUE;
69      // Fallthrough to start processing value without incrementing i.
70
71      case FINDING_VALUE:
72        if (headers[i] == '\n') {
73          // Found value.
74          value.assign(start, &headers[i] - start);
75          result[key] = value;
76          start = &headers[i + 1];
77          state = FINDING_KEY;
78        }
79        break;
80    }
81  }
82
83  return result;
84}
85
86bool ParseContentLength(const StringMap_t& headers, off_t* content_length) {
87  StringMap_t::const_iterator iter = headers.find("Content-Length");
88  if (iter == headers.end())
89    return false;
90
91  *content_length = strtoull(iter->second.c_str(), NULL, 10);
92  return true;
93}
94
95bool ParseContentRange(const StringMap_t& headers,
96                       off_t* read_start,
97                       off_t* read_end,
98                       off_t* entity_length) {
99  StringMap_t::const_iterator iter = headers.find("Content-Range");
100  if (iter == headers.end())
101    return false;
102
103  // The key should look like "bytes ##-##/##" or "bytes ##-##/*". The last
104  // value is the entity length, which can potentially be * (i.e. unknown).
105  off_t read_start_int;
106  off_t read_end_int;
107  off_t entity_length_int;
108  int result = sscanf(iter->second.c_str(),
109                      "bytes %" SCNi64 "-%" SCNi64 "/%" SCNi64,
110                      &read_start_int,
111                      &read_end_int,
112                      &entity_length_int);
113
114  // The Content-Range header specifies an inclusive range: e.g. the first ten
115  // bytes is "bytes 0-9/*". Convert it to a half-open range by incrementing
116  // read_end.
117  if (result == 2) {
118    *read_start = read_start_int;
119    *read_end = read_end_int + 1;
120    *entity_length = 0;
121    return true;
122  } else if (result == 3) {
123    *read_start = read_start_int;
124    *read_end = read_end_int + 1;
125    *entity_length = entity_length_int;
126    return true;
127  }
128
129  return false;
130}
131
132// Maps an HTTP |status_code| onto the appropriate errno code.
133int HTTPStatusCodeToErrno(int status_code) {
134  switch (status_code) {
135    case STATUSCODE_OK:
136    case STATUSCODE_PARTIAL_CONTENT:
137      return 0;
138    case STATUSCODE_FORBIDDEN:
139      return EACCES;
140    case STATUSCODE_NOT_FOUND:
141      return ENOENT;
142  }
143  if (status_code >= 400 && status_code < 500)
144    return EINVAL;
145  return EIO;
146}
147
148}  // namespace
149
150void HttpFsNode::SetCachedSize(off_t size) {
151  has_cached_size_ = true;
152  stat_.st_size = size;
153}
154
155Error HttpFsNode::FSync() {
156  return EACCES;
157}
158
159Error HttpFsNode::GetDents(size_t offs,
160                           struct dirent* pdir,
161                           size_t count,
162                           int* out_bytes) {
163  *out_bytes = 0;
164  return EACCES;
165}
166
167Error HttpFsNode::GetStat(struct stat* stat) {
168  AUTO_LOCK(node_lock_);
169  return GetStat_Locked(stat);
170}
171
172Error HttpFsNode::Read(const HandleAttr& attr,
173                       void* buf,
174                       size_t count,
175                       int* out_bytes) {
176  *out_bytes = 0;
177
178  AUTO_LOCK(node_lock_);
179  if (cache_content_) {
180    if (cached_data_.empty()) {
181      Error error = DownloadToCache();
182      if (error)
183        return error;
184    }
185
186    return ReadPartialFromCache(attr, buf, count, out_bytes);
187  }
188
189  return DownloadPartial(attr, buf, count, out_bytes);
190}
191
192Error HttpFsNode::FTruncate(off_t size) {
193  return EACCES;
194}
195
196Error HttpFsNode::Write(const HandleAttr& attr,
197                        const void* buf,
198                        size_t count,
199                        int* out_bytes) {
200  // TODO(binji): support POST?
201  *out_bytes = 0;
202  return EACCES;
203}
204
205Error HttpFsNode::GetSize(off_t* out_size) {
206  *out_size = 0;
207
208  // TODO(binji): This value should be cached properly; i.e. obey the caching
209  // headers returned by the server.
210  AUTO_LOCK(node_lock_);
211  struct stat statbuf;
212  Error error = GetStat_Locked(&statbuf);
213  if (error)
214    return error;
215
216  *out_size = stat_.st_size;
217  return 0;
218}
219
220HttpFsNode::HttpFsNode(Filesystem* filesystem,
221                       const std::string& url,
222                       bool cache_content)
223    : Node(filesystem),
224      url_(url),
225      cache_content_(cache_content),
226      has_cached_size_(false) {
227}
228
229void HttpFsNode::SetMode(int mode) {
230  stat_.st_mode = mode;
231}
232
233Error HttpFsNode::GetStat_Locked(struct stat* stat) {
234  // Assume we need to 'HEAD' if we do not know the size, otherwise, assume
235  // that the information is constant.  We can add a timeout if needed.
236  HttpFs* filesystem = static_cast<HttpFs*>(filesystem_);
237  if (!has_cached_size_ || !filesystem->cache_stat_) {
238    StringMap_t headers;
239    ScopedResource loader(filesystem_->ppapi());
240    ScopedResource request(filesystem_->ppapi());
241    ScopedResource response(filesystem_->ppapi());
242    int32_t statuscode;
243    StringMap_t response_headers;
244    Error error = OpenUrl("HEAD",
245                          &headers,
246                          &loader,
247                          &request,
248                          &response,
249                          &statuscode,
250                          &response_headers);
251    if (error)
252      return error;
253
254    off_t entity_length;
255    if (ParseContentLength(response_headers, &entity_length)) {
256      SetCachedSize(static_cast<off_t>(entity_length));
257    } else if (cache_content_) {
258      // The server didn't give a content length; download the data to memory
259      // via DownloadToCache, which will also set stat_.st_size;
260      error = DownloadToCache();
261      if (error)
262        return error;
263    } else {
264      // The user doesn't want to cache content, but we didn't get a
265      // "Content-Length" header. Read the entire entity, and throw it away.
266      // Don't use DownloadToCache, as that will still allocate enough memory
267      // for the entire entity.
268      off_t bytes_read;
269      error = DownloadToTemp(&bytes_read);
270      if (error)
271        return error;
272
273      SetCachedSize(bytes_read);
274    }
275
276    stat_.st_atime = 0;  // TODO(binji): Use "Last-Modified".
277    stat_.st_mtime = 0;
278    stat_.st_ctime = 0;
279
280    stat_.st_mode |= S_IFREG;
281  }
282
283  // Fill the stat structure if provided
284  if (stat)
285    *stat = stat_;
286
287  return 0;
288}
289
290Error HttpFsNode::OpenUrl(const char* method,
291                          StringMap_t* request_headers,
292                          ScopedResource* out_loader,
293                          ScopedResource* out_request,
294                          ScopedResource* out_response,
295                          int32_t* out_statuscode,
296                          StringMap_t* out_response_headers) {
297  // Clear all out parameters.
298  *out_statuscode = 0;
299  out_response_headers->clear();
300
301  // Assume lock_ is already held.
302  PepperInterface* ppapi = filesystem_->ppapi();
303
304  HttpFs* mount_http = static_cast<HttpFs*>(filesystem_);
305  out_request->Reset(
306      mount_http->MakeUrlRequestInfo(url_, method, request_headers));
307  if (!out_request->pp_resource())
308    return EINVAL;
309
310  URLLoaderInterface* loader_interface = ppapi->GetURLLoaderInterface();
311  URLResponseInfoInterface* response_interface =
312      ppapi->GetURLResponseInfoInterface();
313  VarInterface* var_interface = ppapi->GetVarInterface();
314
315  out_loader->Reset(loader_interface->Create(ppapi->GetInstance()));
316  if (!out_loader->pp_resource())
317    return EINVAL;
318
319  int32_t result = loader_interface->Open(out_loader->pp_resource(),
320                                          out_request->pp_resource(),
321                                          PP_BlockUntilComplete());
322  if (result != PP_OK)
323    return PPErrorToErrno(result);
324
325  out_response->Reset(
326      loader_interface->GetResponseInfo(out_loader->pp_resource()));
327  if (!out_response->pp_resource())
328    return EINVAL;
329
330  // Get response statuscode.
331  PP_Var statuscode = response_interface->GetProperty(
332      out_response->pp_resource(), PP_URLRESPONSEPROPERTY_STATUSCODE);
333
334  if (statuscode.type != PP_VARTYPE_INT32)
335    return EINVAL;
336
337  *out_statuscode = statuscode.value.as_int;
338
339  // Only accept OK or Partial Content.
340  Error error = HTTPStatusCodeToErrno(*out_statuscode);
341  if (error)
342    return error;
343
344  // Get response headers.
345  PP_Var response_headers_var = response_interface->GetProperty(
346      out_response->pp_resource(), PP_URLRESPONSEPROPERTY_HEADERS);
347
348  uint32_t response_headers_length;
349  const char* response_headers_str =
350      var_interface->VarToUtf8(response_headers_var, &response_headers_length);
351
352  *out_response_headers =
353      ParseHeaders(response_headers_str, response_headers_length);
354
355  var_interface->Release(response_headers_var);
356
357  return 0;
358}
359
360Error HttpFsNode::DownloadToCache() {
361  StringMap_t headers;
362  ScopedResource loader(filesystem_->ppapi());
363  ScopedResource request(filesystem_->ppapi());
364  ScopedResource response(filesystem_->ppapi());
365  int32_t statuscode;
366  StringMap_t response_headers;
367  Error error = OpenUrl("GET",
368                        &headers,
369                        &loader,
370                        &request,
371                        &response,
372                        &statuscode,
373                        &response_headers);
374  if (error)
375    return error;
376
377  off_t content_length = 0;
378  if (ParseContentLength(response_headers, &content_length)) {
379    cached_data_.resize(content_length);
380    int real_size;
381    error = ReadResponseToBuffer(
382        loader, cached_data_.data(), content_length, &real_size);
383    if (error)
384      return error;
385
386    SetCachedSize(real_size);
387    cached_data_.resize(real_size);
388    return 0;
389  }
390
391  int bytes_read;
392  error = ReadEntireResponseToCache(loader, &bytes_read);
393  if (error)
394    return error;
395
396  SetCachedSize(bytes_read);
397  return 0;
398}
399
400Error HttpFsNode::ReadPartialFromCache(const HandleAttr& attr,
401                                       void* buf,
402                                       int count,
403                                       int* out_bytes) {
404  *out_bytes = 0;
405  off_t size = cached_data_.size();
406
407  if (attr.offs + count > size)
408    count = size - attr.offs;
409
410  if (count <= 0)
411    return 0;
412
413  memcpy(buf, &cached_data_.data()[attr.offs], count);
414  *out_bytes = count;
415  return 0;
416}
417
418Error HttpFsNode::DownloadPartial(const HandleAttr& attr,
419                                  void* buf,
420                                  off_t count,
421                                  int* out_bytes) {
422  *out_bytes = 0;
423
424  StringMap_t headers;
425
426  char buffer[100];
427  // Range request is inclusive: 0-99 returns 100 bytes.
428  snprintf(&buffer[0],
429           sizeof(buffer),
430           "bytes=%" PRIi64 "-%" PRIi64,
431           attr.offs,
432           attr.offs + count - 1);
433  headers["Range"] = buffer;
434
435  ScopedResource loader(filesystem_->ppapi());
436  ScopedResource request(filesystem_->ppapi());
437  ScopedResource response(filesystem_->ppapi());
438  int32_t statuscode;
439  StringMap_t response_headers;
440  Error error = OpenUrl("GET",
441                        &headers,
442                        &loader,
443                        &request,
444                        &response,
445                        &statuscode,
446                        &response_headers);
447  if (error) {
448    if (statuscode == STATUSCODE_REQUESTED_RANGE_NOT_SATISFIABLE) {
449      // We're likely trying to read past the end. Return 0 bytes.
450      *out_bytes = 0;
451      return 0;
452    }
453
454    return error;
455  }
456
457  off_t read_start = 0;
458  if (statuscode == STATUSCODE_OK) {
459    // No partial result, read everything starting from the part we care about.
460    off_t content_length;
461    if (ParseContentLength(response_headers, &content_length)) {
462      if (attr.offs >= content_length)
463        return EINVAL;
464
465      // Clamp count, if trying to read past the end of the file.
466      if (attr.offs + count > content_length) {
467        count = content_length - attr.offs;
468      }
469    }
470  } else if (statuscode == STATUSCODE_PARTIAL_CONTENT) {
471    // Determine from the headers where we are reading.
472    off_t read_end;
473    off_t entity_length;
474    if (ParseContentRange(
475            response_headers, &read_start, &read_end, &entity_length)) {
476      if (read_start > attr.offs || read_start > read_end) {
477        // If this error occurs, the server is returning bogus values.
478        return EINVAL;
479      }
480
481      // Clamp count, if trying to read past the end of the file.
482      count = std::min(read_end - read_start, count);
483    } else {
484      // Partial Content without Content-Range. Assume that the server gave us
485      // exactly what we asked for. This can happen even when the server
486      // returns 200 -- the cache may return 206 in this case, but not modify
487      // the headers.
488      read_start = attr.offs;
489    }
490  }
491
492  if (read_start < attr.offs) {
493    // We aren't yet at the location where we want to start reading. Read into
494    // our dummy buffer until then.
495    int bytes_to_read = attr.offs - read_start;
496    int bytes_read;
497    error = ReadResponseToTemp(loader, bytes_to_read, &bytes_read);
498    if (error)
499      return error;
500
501    // Tried to read past the end of the entity.
502    if (bytes_read < bytes_to_read) {
503      *out_bytes = 0;
504      return 0;
505    }
506  }
507
508  return ReadResponseToBuffer(loader, buf, count, out_bytes);
509}
510
511Error HttpFsNode::DownloadToTemp(off_t* out_bytes) {
512  StringMap_t headers;
513  ScopedResource loader(filesystem_->ppapi());
514  ScopedResource request(filesystem_->ppapi());
515  ScopedResource response(filesystem_->ppapi());
516  int32_t statuscode;
517  StringMap_t response_headers;
518  Error error = OpenUrl("GET",
519                        &headers,
520                        &loader,
521                        &request,
522                        &response,
523                        &statuscode,
524                        &response_headers);
525  if (error)
526    return error;
527
528  off_t content_length = 0;
529  if (ParseContentLength(response_headers, &content_length)) {
530    *out_bytes = content_length;
531    return 0;
532  }
533
534  return ReadEntireResponseToTemp(loader, out_bytes);
535}
536
537Error HttpFsNode::ReadEntireResponseToTemp(const ScopedResource& loader,
538                                           off_t* out_bytes) {
539  *out_bytes = 0;
540
541  const int kBytesToRead = MAX_READ_BUFFER_SIZE;
542  buffer_.resize(kBytesToRead);
543
544  while (true) {
545    int bytes_read;
546    Error error =
547        ReadResponseToBuffer(loader, buffer_.data(), kBytesToRead, &bytes_read);
548    if (error)
549      return error;
550
551    *out_bytes += bytes_read;
552
553    if (bytes_read < kBytesToRead)
554      return 0;
555  }
556}
557
558Error HttpFsNode::ReadEntireResponseToCache(const ScopedResource& loader,
559                                            int* out_bytes) {
560  *out_bytes = 0;
561  const int kBytesToRead = MAX_READ_BUFFER_SIZE;
562
563  while (true) {
564    // Always recalculate the buf pointer because it may have moved when
565    // cached_data_ was resized.
566    cached_data_.resize(*out_bytes + kBytesToRead);
567    void* buf = cached_data_.data() + *out_bytes;
568
569    int bytes_read;
570    Error error = ReadResponseToBuffer(loader, buf, kBytesToRead, &bytes_read);
571    if (error)
572      return error;
573
574    *out_bytes += bytes_read;
575
576    if (bytes_read < kBytesToRead) {
577      // Shrink the cached data buffer to the correct size.
578      cached_data_.resize(*out_bytes);
579      return 0;
580    }
581  }
582}
583
584Error HttpFsNode::ReadResponseToTemp(const ScopedResource& loader,
585                                     int count,
586                                     int* out_bytes) {
587  *out_bytes = 0;
588
589  if (buffer_.size() < static_cast<size_t>(count))
590    buffer_.resize(std::min(count, MAX_READ_BUFFER_SIZE));
591
592  int bytes_left = count;
593  while (bytes_left > 0) {
594    int bytes_to_read =
595        std::min(static_cast<size_t>(bytes_left), buffer_.size());
596    int bytes_read;
597    Error error = ReadResponseToBuffer(
598        loader, buffer_.data(), bytes_to_read, &bytes_read);
599    if (error)
600      return error;
601
602    if (bytes_read == 0)
603      return 0;
604
605    bytes_left -= bytes_read;
606    *out_bytes += bytes_read;
607  }
608
609  return 0;
610}
611
612Error HttpFsNode::ReadResponseToBuffer(const ScopedResource& loader,
613                                       void* buf,
614                                       int count,
615                                       int* out_bytes) {
616  *out_bytes = 0;
617
618  PepperInterface* ppapi = filesystem_->ppapi();
619  URLLoaderInterface* loader_interface = ppapi->GetURLLoaderInterface();
620
621  char* out_buffer = static_cast<char*>(buf);
622  int bytes_to_read = count;
623  while (bytes_to_read > 0) {
624    int bytes_read =
625        loader_interface->ReadResponseBody(loader.pp_resource(),
626                                           out_buffer,
627                                           bytes_to_read,
628                                           PP_BlockUntilComplete());
629
630    if (bytes_read == 0) {
631      // This is not an error -- it may just be that we were trying to read
632      // more data than exists.
633      *out_bytes = count - bytes_to_read;
634      return 0;
635    }
636
637    if (bytes_read < 0)
638      return PPErrorToErrno(bytes_read);
639
640    assert(bytes_read <= bytes_to_read);
641    bytes_to_read -= bytes_read;
642    out_buffer += bytes_read;
643  }
644
645  *out_bytes = count;
646  return 0;
647}
648
649}  // namespace nacl_io
650