15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2011 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This is a port of ManifestParser.cc from WebKit/WebCore/loader/appcache.
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Copyright (C) 2008 Apple Inc. All Rights Reserved.
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Redistribution and use in source and binary forms, with or without
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * modification, are permitted provided that the following conditions
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * are met:
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 1. Redistributions of source code must retain the above copyright
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *    notice, this list of conditions and the following disclaimer.
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 2. Redistributions in binary form must reproduce the above copyright
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *    notice, this list of conditions and the following disclaimer in the
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *    documentation and/or other materials provided with the distribution.
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
32868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "webkit/browser/appcache/manifest_parser.h"
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
34c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "base/command_line.h"
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/i18n/icu_string_conversions.h"
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/logging.h"
37868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "base/strings/utf_string_conversions.h"
38eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch#include "url/gurl.h"
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace appcache {
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
42c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)namespace {
43c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
44c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Helper function used to identify 'isPattern' annotations.
45c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)bool HasPatternMatchingAnnotation(const wchar_t* line_p,
46c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                                  const wchar_t* line_end) {
47c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Skip whitespace separating the resource url from the annotation.
48c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Note: trailing whitespace has already been trimmed from the line.
49c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  while (line_p < line_end && (*line_p == '\t' || *line_p == ' '))
50c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    ++line_p;
51c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (line_p == line_end)
52c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return false;
53c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  std::wstring annotation(line_p, line_end - line_p);
54c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  return annotation == L"isPattern";
55c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
56c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
57c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
58c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)enum Mode {
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPLICIT,
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  INTERCEPT,
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  FALLBACK,
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ONLINE_WHITELIST,
64c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  UNKNOWN_MODE,
65c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)};
66c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
67c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)enum InterceptVerb {
68c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  RETURN,
69c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  EXECUTE,
70c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  UNKNOWN_VERB,
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
73cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)Manifest::Manifest()
74cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    : online_whitelist_all(false),
75cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      did_ignore_intercept_namespaces(false) {
76cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)}
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)Manifest::~Manifest() {}
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool ParseManifest(const GURL& manifest_url, const char* data, int length,
81cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)                   ParseMode parse_mode, Manifest& manifest) {
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // This is an implementation of the parsing algorithm specified in
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // the HTML5 offline web application docs:
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //   http://www.w3.org/TR/html5/offline.html
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Do not modify it without consulting those docs.
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Though you might be tempted to convert these wstrings to UTF-8 or
87c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // base::string16, this implementation seems simpler given the constraints.
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const wchar_t kSignature[] = L"CACHE MANIFEST";
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const size_t kSignatureLength = arraysize(kSignature) - 1;
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const wchar_t kChromiumSignature[] = L"CHROMIUM CACHE MANIFEST";
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const size_t kChromiumSignatureLength = arraysize(kChromiumSignature) - 1;
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(manifest.explicit_urls.empty());
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(manifest.fallback_namespaces.empty());
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(manifest.online_whitelist_namespaces.empty());
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(!manifest.online_whitelist_all);
98cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  DCHECK(!manifest.did_ignore_intercept_namespaces);
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Mode mode = EXPLICIT;
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::wstring data_string;
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // TODO(jennb): cannot do UTF8ToWide(data, length, &data_string);
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // until UTF8ToWide uses 0xFFFD Unicode replacement character.
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  base::CodepageToWide(std::string(data, length), base::kCodepageUTF8,
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                       base::OnStringConversionError::SUBSTITUTE, &data_string);
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const wchar_t* p = data_string.c_str();
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const wchar_t* end = p + data_string.length();
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Look for the magic signature: "^\xFEFF?CACHE MANIFEST[ \t]?"
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Example: "CACHE MANIFEST #comment" is a valid signature.
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Example: "CACHE MANIFEST;V2" is not.
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // When the input data starts with a UTF-8 Byte-Order-Mark
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // (0xEF, 0xBB, 0xBF), the UTF8ToWide() function converts it to a
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Unicode BOM (U+FEFF). Skip a converted Unicode BOM if it exists.
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int bom_offset = 0;
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!data_string.empty() && data_string[0] == 0xFEFF) {
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    bom_offset = 1;
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ++p;
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (p >= end)
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Check for a supported signature and skip p past it.
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (0 == data_string.compare(bom_offset, kSignatureLength,
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                               kSignature)) {
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    p += kSignatureLength;
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else if (0 == data_string.compare(bom_offset, kChromiumSignatureLength,
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                      kChromiumSignature)) {
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    p += kChromiumSignatureLength;
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Character after "CACHE MANIFEST" must be whitespace.
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (p < end && *p != ' ' && *p != '\t' && *p != '\n' && *p != '\r')
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Skip to the end of the line.
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while (p < end && *p != '\r' && *p != '\n')
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ++p;
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while (1) {
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Skip whitespace
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while (p < end && (*p == '\n' || *p == '\r' || *p == ' ' || *p == '\t'))
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ++p;
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (p == end)
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const wchar_t* line_start = p;
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Find the end of the line
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while (p < end && *p != '\r' && *p != '\n')
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ++p;
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Check if we have a comment
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (*line_start == '#')
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      continue;
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Get rid of trailing whitespace
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const wchar_t* tmp = p - 1;
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while (tmp > line_start && (*tmp == ' ' || *tmp == '\t'))
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      --tmp;
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    std::wstring line(line_start, tmp - line_start + 1);
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (line == L"CACHE:") {
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      mode = EXPLICIT;
1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (line == L"FALLBACK:") {
1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      mode = FALLBACK;
1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (line == L"NETWORK:") {
1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      mode = ONLINE_WHITELIST;
1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (line == L"CHROMIUM-INTERCEPT:") {
1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      mode = INTERCEPT;
1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (*(line.end() - 1) == ':') {
179c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      mode = UNKNOWN_MODE;
180c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    } else if (mode == UNKNOWN_MODE) {
1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      continue;
1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (line == L"*" && mode == ONLINE_WHITELIST) {
1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      manifest.online_whitelist_all = true;
1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      continue;
1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (mode == EXPLICIT || mode == ONLINE_WHITELIST) {
1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      const wchar_t *line_p = line.c_str();
1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      const wchar_t *line_end = line_p + line.length();
1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Look for whitespace separating the URL from subsequent ignored tokens.
1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      while (line_p < line_end && *line_p != '\t' && *line_p != ' ')
1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ++line_p;
1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
193c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      base::string16 url16;
1945d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      base::WideToUTF16(line.c_str(), line_p - line.c_str(), &url16);
1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      GURL url = manifest_url.Resolve(url16);
1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!url.is_valid())
1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;
1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (url.has_ref()) {
1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        GURL::Replacements replacements;
2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        replacements.ClearRef();
2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        url = url.ReplaceComponents(replacements);
2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Scheme component must be the same as the manifest URL's.
2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (url.scheme() != manifest_url.scheme()) {
2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;
2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // See http://code.google.com/p/chromium/issues/detail?id=69594
2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // We willfully violate the HTML5 spec at this point in order
2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // to support the appcaching of cross-origin HTTPS resources.
2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Per the spec, EXPLICIT cross-origin HTTS resources should be
2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // ignored here. We've opted for a milder constraint and allow
2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // caching unless the resource has a "no-store" header. That
2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // condition is enforced in AppCacheUpdateJob.
2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (mode == EXPLICIT) {
2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        manifest.explicit_urls.insert(url.spec());
2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else {
220c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        bool is_pattern = HasPatternMatchingAnnotation(line_p, line_end);
221c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        manifest.online_whitelist_namespaces.push_back(
222f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)            Namespace(APPCACHE_NETWORK_NAMESPACE, url, GURL(), is_pattern));
2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (mode == INTERCEPT) {
225cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      if (parse_mode != PARSE_MANIFEST_ALLOWING_INTERCEPTS) {
226cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)        manifest.did_ignore_intercept_namespaces = true;
227cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)        continue;
228cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      }
229cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)
2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Lines of the form,
2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // <urlnamespace> <intercept_type> <targeturl>
2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      const wchar_t* line_p = line.c_str();
2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      const wchar_t* line_end = line_p + line.length();
2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Look for first whitespace separating the url namespace from
2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // the intercept type.
2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      while (line_p < line_end && *line_p != '\t' && *line_p != ' ')
2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ++line_p;
2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (line_p == line_end)
2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;  // There was no whitespace separating the URLs.
2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
243c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      base::string16 namespace_url16;
2445d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      base::WideToUTF16(line.c_str(), line_p - line.c_str(), &namespace_url16);
2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      GURL namespace_url = manifest_url.Resolve(namespace_url16);
2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!namespace_url.is_valid())
2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;
2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (namespace_url.has_ref()) {
2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        GURL::Replacements replacements;
2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        replacements.ClearRef();
2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        namespace_url = namespace_url.ReplaceComponents(replacements);
2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // The namespace URL must have the same scheme, host and port
2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // as the manifest's URL.
2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (manifest_url.GetOrigin() != namespace_url.GetOrigin())
2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;
2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Skip whitespace separating namespace from the type.
2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      while (line_p < line_end && (*line_p == '\t' || *line_p == ' '))
2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ++line_p;
2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Look for whitespace separating the type from the target url.
2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      const wchar_t* type_start = line_p;
2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      while (line_p < line_end && *line_p != '\t' && *line_p != ' ')
2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ++line_p;
2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Look for a type value we understand, otherwise skip the line.
269c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      InterceptVerb verb = UNKNOWN_VERB;
2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      std::wstring type(type_start, line_p - type_start);
271c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      if (type == L"return") {
272c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        verb = RETURN;
273c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      } else if (type == L"execute" &&
274c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                 CommandLine::ForCurrentProcess()->HasSwitch(
275c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                    kEnableExecutableHandlers)) {
276c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        verb = EXECUTE;
277c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      }
278c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      if (verb == UNKNOWN_VERB)
2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;
2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Skip whitespace separating type from the target_url.
2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      while (line_p < line_end && (*line_p == '\t' || *line_p == ' '))
2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ++line_p;
2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Look for whitespace separating the URL from subsequent ignored tokens.
2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      const wchar_t* target_url_start = line_p;
2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      while (line_p < line_end && *line_p != '\t' && *line_p != ' ')
2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ++line_p;
2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
290c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      base::string16 target_url16;
2915d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      base::WideToUTF16(target_url_start, line_p - target_url_start,
2925d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)                        &target_url16);
2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      GURL target_url = manifest_url.Resolve(target_url16);
2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!target_url.is_valid())
2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;
2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (target_url.has_ref()) {
2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        GURL::Replacements replacements;
2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        replacements.ClearRef();
3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        target_url = target_url.ReplaceComponents(replacements);
3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (manifest_url.GetOrigin() != target_url.GetOrigin())
3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;
3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
305c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      bool is_pattern = HasPatternMatchingAnnotation(line_p, line_end);
3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      manifest.intercept_namespaces.push_back(
307f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)          Namespace(APPCACHE_INTERCEPT_NAMESPACE, namespace_url,
308c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                    target_url, is_pattern, verb == EXECUTE));
3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (mode == FALLBACK) {
3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      const wchar_t* line_p = line.c_str();
3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      const wchar_t* line_end = line_p + line.length();
3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Look for whitespace separating the two URLs
3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      while (line_p < line_end && *line_p != '\t' && *line_p != ' ')
3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ++line_p;
3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (line_p == line_end) {
3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // There was no whitespace separating the URLs.
3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;
3205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
322c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      base::string16 namespace_url16;
3235d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      base::WideToUTF16(line.c_str(), line_p - line.c_str(), &namespace_url16);
3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      GURL namespace_url = manifest_url.Resolve(namespace_url16);
3255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!namespace_url.is_valid())
3265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;
3275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (namespace_url.has_ref()) {
3285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        GURL::Replacements replacements;
3295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        replacements.ClearRef();
3305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        namespace_url = namespace_url.ReplaceComponents(replacements);
3315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Fallback namespace URL must have the same scheme, host and port
3345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // as the manifest's URL.
3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (manifest_url.GetOrigin() != namespace_url.GetOrigin()) {
3365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;
3375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
3385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Skip whitespace separating fallback namespace from URL.
3405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      while (line_p < line_end && (*line_p == '\t' || *line_p == ' '))
3415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ++line_p;
3425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Look for whitespace separating the URL from subsequent ignored tokens.
3445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      const wchar_t* fallback_start = line_p;
3455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      while (line_p < line_end && *line_p != '\t' && *line_p != ' ')
3465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ++line_p;
3475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
348c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      base::string16 fallback_url16;
3495d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      base::WideToUTF16(fallback_start, line_p - fallback_start,
3505d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)                        &fallback_url16);
3515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      GURL fallback_url = manifest_url.Resolve(fallback_url16);
3525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!fallback_url.is_valid())
3535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;
3545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (fallback_url.has_ref()) {
3555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        GURL::Replacements replacements;
3565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        replacements.ClearRef();
3575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        fallback_url = fallback_url.ReplaceComponents(replacements);
3585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
3595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Fallback entry URL must have the same scheme, host and port
3615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // as the manifest's URL.
3625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (manifest_url.GetOrigin() != fallback_url.GetOrigin()) {
3635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;
3645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
3655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
366c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      bool is_pattern = HasPatternMatchingAnnotation(line_p, line_end);
367c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
3685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Store regardless of duplicate namespace URL. Only first match
3695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // will ever be used.
3705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      manifest.fallback_namespaces.push_back(
371f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)          Namespace(APPCACHE_FALLBACK_NAMESPACE, namespace_url,
372c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                    fallback_url, is_pattern));
3735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
3745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      NOTREACHED();
3755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
3765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return true;
3795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace appcache
382