manifest_parser.cc revision eb525c5499e34cc9c4b825d6d9e75bb07cc06ace
15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2011 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This is a port of ManifestParser.cc from WebKit/WebCore/loader/appcache.
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Copyright (C) 2008 Apple Inc. All Rights Reserved.
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Redistribution and use in source and binary forms, with or without
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * modification, are permitted provided that the following conditions
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * are met:
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 1. Redistributions of source code must retain the above copyright
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *    notice, this list of conditions and the following disclaimer.
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 2. Redistributions in binary form must reproduce the above copyright
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *    notice, this list of conditions and the following disclaimer in the
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *    documentation and/or other materials provided with the distribution.
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
32868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "webkit/browser/appcache/manifest_parser.h"
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
34c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "base/command_line.h"
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/i18n/icu_string_conversions.h"
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/logging.h"
37868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "base/strings/utf_string_conversions.h"
38eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch#include "url/gurl.h"
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace appcache {
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
42c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)namespace {
43c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
44c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Helper function used to identify 'isPattern' annotations.
45c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)bool HasPatternMatchingAnnotation(const wchar_t* line_p,
46c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                                  const wchar_t* line_end) {
47c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Skip whitespace separating the resource url from the annotation.
48c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Note: trailing whitespace has already been trimmed from the line.
49c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  while (line_p < line_end && (*line_p == '\t' || *line_p == ' '))
50c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    ++line_p;
51c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (line_p == line_end)
52c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return false;
53c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  std::wstring annotation(line_p, line_end - line_p);
54c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  return annotation == L"isPattern";
55c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
56c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
57c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
58c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)enum Mode {
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPLICIT,
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  INTERCEPT,
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  FALLBACK,
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ONLINE_WHITELIST,
64c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  UNKNOWN_MODE,
65c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)};
66c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
67c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)enum InterceptVerb {
68c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  RETURN,
69c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  EXECUTE,
70c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  UNKNOWN_VERB,
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)Manifest::Manifest() : online_whitelist_all(false) {}
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)Manifest::~Manifest() {}
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool ParseManifest(const GURL& manifest_url, const char* data, int length,
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                   Manifest& manifest) {
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // This is an implementation of the parsing algorithm specified in
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // the HTML5 offline web application docs:
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //   http://www.w3.org/TR/html5/offline.html
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Do not modify it without consulting those docs.
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Though you might be tempted to convert these wstrings to UTF-8 or
84c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // base::string16, this implementation seems simpler given the constraints.
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const wchar_t kSignature[] = L"CACHE MANIFEST";
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const size_t kSignatureLength = arraysize(kSignature) - 1;
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const wchar_t kChromiumSignature[] = L"CHROMIUM CACHE MANIFEST";
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const size_t kChromiumSignatureLength = arraysize(kChromiumSignature) - 1;
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(manifest.explicit_urls.empty());
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(manifest.fallback_namespaces.empty());
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(manifest.online_whitelist_namespaces.empty());
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(!manifest.online_whitelist_all);
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Mode mode = EXPLICIT;
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::wstring data_string;
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // TODO(jennb): cannot do UTF8ToWide(data, length, &data_string);
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // until UTF8ToWide uses 0xFFFD Unicode replacement character.
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  base::CodepageToWide(std::string(data, length), base::kCodepageUTF8,
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                       base::OnStringConversionError::SUBSTITUTE, &data_string);
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const wchar_t* p = data_string.c_str();
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const wchar_t* end = p + data_string.length();
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Look for the magic signature: "^\xFEFF?CACHE MANIFEST[ \t]?"
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Example: "CACHE MANIFEST #comment" is a valid signature.
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Example: "CACHE MANIFEST;V2" is not.
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // When the input data starts with a UTF-8 Byte-Order-Mark
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // (0xEF, 0xBB, 0xBF), the UTF8ToWide() function converts it to a
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Unicode BOM (U+FEFF). Skip a converted Unicode BOM if it exists.
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int bom_offset = 0;
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!data_string.empty() && data_string[0] == 0xFEFF) {
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    bom_offset = 1;
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ++p;
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (p >= end)
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Check for a supported signature and skip p past it.
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (0 == data_string.compare(bom_offset, kSignatureLength,
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                               kSignature)) {
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    p += kSignatureLength;
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else if (0 == data_string.compare(bom_offset, kChromiumSignatureLength,
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                      kChromiumSignature)) {
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    p += kChromiumSignatureLength;
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Character after "CACHE MANIFEST" must be whitespace.
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (p < end && *p != ' ' && *p != '\t' && *p != '\n' && *p != '\r')
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Skip to the end of the line.
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while (p < end && *p != '\r' && *p != '\n')
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ++p;
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while (1) {
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Skip whitespace
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while (p < end && (*p == '\n' || *p == '\r' || *p == ' ' || *p == '\t'))
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ++p;
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (p == end)
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const wchar_t* line_start = p;
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Find the end of the line
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while (p < end && *p != '\r' && *p != '\n')
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ++p;
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Check if we have a comment
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (*line_start == '#')
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      continue;
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Get rid of trailing whitespace
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const wchar_t* tmp = p - 1;
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while (tmp > line_start && (*tmp == ' ' || *tmp == '\t'))
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      --tmp;
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    std::wstring line(line_start, tmp - line_start + 1);
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (line == L"CACHE:") {
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      mode = EXPLICIT;
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (line == L"FALLBACK:") {
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      mode = FALLBACK;
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (line == L"NETWORK:") {
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      mode = ONLINE_WHITELIST;
1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (line == L"CHROMIUM-INTERCEPT:") {
1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      mode = INTERCEPT;
1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (*(line.end() - 1) == ':') {
175c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      mode = UNKNOWN_MODE;
176c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    } else if (mode == UNKNOWN_MODE) {
1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      continue;
1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (line == L"*" && mode == ONLINE_WHITELIST) {
1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      manifest.online_whitelist_all = true;
1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      continue;
1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (mode == EXPLICIT || mode == ONLINE_WHITELIST) {
1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      const wchar_t *line_p = line.c_str();
1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      const wchar_t *line_end = line_p + line.length();
1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Look for whitespace separating the URL from subsequent ignored tokens.
1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      while (line_p < line_end && *line_p != '\t' && *line_p != ' ')
1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ++line_p;
1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
189c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      base::string16 url16;
1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      WideToUTF16(line.c_str(), line_p - line.c_str(), &url16);
1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      GURL url = manifest_url.Resolve(url16);
1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!url.is_valid())
1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;
1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (url.has_ref()) {
1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        GURL::Replacements replacements;
1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        replacements.ClearRef();
1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        url = url.ReplaceComponents(replacements);
1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Scheme component must be the same as the manifest URL's.
2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (url.scheme() != manifest_url.scheme()) {
2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;
2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // See http://code.google.com/p/chromium/issues/detail?id=69594
2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // We willfully violate the HTML5 spec at this point in order
2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // to support the appcaching of cross-origin HTTPS resources.
2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Per the spec, EXPLICIT cross-origin HTTS resources should be
2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // ignored here. We've opted for a milder constraint and allow
2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // caching unless the resource has a "no-store" header. That
2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // condition is enforced in AppCacheUpdateJob.
2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (mode == EXPLICIT) {
2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        manifest.explicit_urls.insert(url.spec());
2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else {
216c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        bool is_pattern = HasPatternMatchingAnnotation(line_p, line_end);
217c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        manifest.online_whitelist_namespaces.push_back(
218c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            Namespace(NETWORK_NAMESPACE, url, GURL(), is_pattern));
2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (mode == INTERCEPT) {
2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Lines of the form,
2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // <urlnamespace> <intercept_type> <targeturl>
2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      const wchar_t* line_p = line.c_str();
2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      const wchar_t* line_end = line_p + line.length();
2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Look for first whitespace separating the url namespace from
2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // the intercept type.
2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      while (line_p < line_end && *line_p != '\t' && *line_p != ' ')
2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ++line_p;
2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (line_p == line_end)
2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;  // There was no whitespace separating the URLs.
2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
234c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      base::string16 namespace_url16;
2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      WideToUTF16(line.c_str(), line_p - line.c_str(), &namespace_url16);
2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      GURL namespace_url = manifest_url.Resolve(namespace_url16);
2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!namespace_url.is_valid())
2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;
2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (namespace_url.has_ref()) {
2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        GURL::Replacements replacements;
2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        replacements.ClearRef();
2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        namespace_url = namespace_url.ReplaceComponents(replacements);
2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // The namespace URL must have the same scheme, host and port
2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // as the manifest's URL.
2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (manifest_url.GetOrigin() != namespace_url.GetOrigin())
2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;
2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Skip whitespace separating namespace from the type.
2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      while (line_p < line_end && (*line_p == '\t' || *line_p == ' '))
2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ++line_p;
2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Look for whitespace separating the type from the target url.
2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      const wchar_t* type_start = line_p;
2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      while (line_p < line_end && *line_p != '\t' && *line_p != ' ')
2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ++line_p;
2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Look for a type value we understand, otherwise skip the line.
260c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      InterceptVerb verb = UNKNOWN_VERB;
2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      std::wstring type(type_start, line_p - type_start);
262c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      if (type == L"return") {
263c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        verb = RETURN;
264c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      } else if (type == L"execute" &&
265c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                 CommandLine::ForCurrentProcess()->HasSwitch(
266c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                    kEnableExecutableHandlers)) {
267c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        verb = EXECUTE;
268c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      }
269c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      if (verb == UNKNOWN_VERB)
2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;
2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Skip whitespace separating type from the target_url.
2735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      while (line_p < line_end && (*line_p == '\t' || *line_p == ' '))
2745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ++line_p;
2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Look for whitespace separating the URL from subsequent ignored tokens.
2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      const wchar_t* target_url_start = line_p;
2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      while (line_p < line_end && *line_p != '\t' && *line_p != ' ')
2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ++line_p;
2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
281c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      base::string16 target_url16;
2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      WideToUTF16(target_url_start, line_p - target_url_start, &target_url16);
2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      GURL target_url = manifest_url.Resolve(target_url16);
2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!target_url.is_valid())
2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;
2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (target_url.has_ref()) {
2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        GURL::Replacements replacements;
2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        replacements.ClearRef();
2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        target_url = target_url.ReplaceComponents(replacements);
2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (manifest_url.GetOrigin() != target_url.GetOrigin())
2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;
2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
295c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      bool is_pattern = HasPatternMatchingAnnotation(line_p, line_end);
2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      manifest.intercept_namespaces.push_back(
297c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)          Namespace(INTERCEPT_NAMESPACE, namespace_url,
298c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                    target_url, is_pattern, verb == EXECUTE));
2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (mode == FALLBACK) {
3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      const wchar_t* line_p = line.c_str();
3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      const wchar_t* line_end = line_p + line.length();
3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Look for whitespace separating the two URLs
3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      while (line_p < line_end && *line_p != '\t' && *line_p != ' ')
3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ++line_p;
3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (line_p == line_end) {
3085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // There was no whitespace separating the URLs.
3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;
3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
312c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      base::string16 namespace_url16;
3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      WideToUTF16(line.c_str(), line_p - line.c_str(), &namespace_url16);
3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      GURL namespace_url = manifest_url.Resolve(namespace_url16);
3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!namespace_url.is_valid())
3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;
3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (namespace_url.has_ref()) {
3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        GURL::Replacements replacements;
3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        replacements.ClearRef();
3205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        namespace_url = namespace_url.ReplaceComponents(replacements);
3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
3225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Fallback namespace URL must have the same scheme, host and port
3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // as the manifest's URL.
3255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (manifest_url.GetOrigin() != namespace_url.GetOrigin()) {
3265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;
3275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
3285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Skip whitespace separating fallback namespace from URL.
3305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      while (line_p < line_end && (*line_p == '\t' || *line_p == ' '))
3315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ++line_p;
3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Look for whitespace separating the URL from subsequent ignored tokens.
3345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      const wchar_t* fallback_start = line_p;
3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      while (line_p < line_end && *line_p != '\t' && *line_p != ' ')
3365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ++line_p;
3375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
338c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      base::string16 fallback_url16;
3395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      WideToUTF16(fallback_start, line_p - fallback_start, &fallback_url16);
3405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      GURL fallback_url = manifest_url.Resolve(fallback_url16);
3415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!fallback_url.is_valid())
3425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;
3435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (fallback_url.has_ref()) {
3445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        GURL::Replacements replacements;
3455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        replacements.ClearRef();
3465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        fallback_url = fallback_url.ReplaceComponents(replacements);
3475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
3485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Fallback entry URL must have the same scheme, host and port
3505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // as the manifest's URL.
3515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (manifest_url.GetOrigin() != fallback_url.GetOrigin()) {
3525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;
3535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
3545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
355c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      bool is_pattern = HasPatternMatchingAnnotation(line_p, line_end);
356c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
3575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Store regardless of duplicate namespace URL. Only first match
3585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // will ever be used.
3595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      manifest.fallback_namespaces.push_back(
360c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)          Namespace(FALLBACK_NAMESPACE, namespace_url,
361c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                    fallback_url, is_pattern));
3625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
3635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      NOTREACHED();
3645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
3655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return true;
3685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace appcache
371