1c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Copyright 2013 The Chromium Authors. All rights reserved. 2c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 3c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// found in the LICENSE file. 4c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 5c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "base/logging.h" 6c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "url/url_canon.h" 7c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "url/url_canon_internal.h" 8c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 90529e5d033099cbfc42635f6f6183833b09dff6eBen Murdochnamespace url { 10c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 11c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)namespace { 12c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 13c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// For reference, here's what IE supports: 14c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Key: 0 (disallowed: failure if present in the input) 15c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// + (allowed either escaped or unescaped, and unmodified) 16c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// U (allowed escaped or unescaped but always unescaped if present in 17c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// escaped form) 18c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// E (allowed escaped or unescaped but always escaped if present in 19c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// unescaped form) 20c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// % (only allowed escaped in the input, will be unmodified). 21c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// I left blank alpha numeric characters. 22c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 23c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 24c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// ----------------------------------------------- 25c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 0 0 E E E E E E E E E E E E E E E 26c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 1 E E E E E E E E E E E E E E E E 27c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 2 E + E E + E + + + + + + + U U 0 28c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 3 % % E + E 0 <-- Those are : ; < = > ? 29c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 4 % 30c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 5 U 0 U U U <-- Those are [ \ ] ^ _ 31c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 6 E <-- That's ` 32c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 7 E E E U E <-- Those are { | } ~ (UNPRINTABLE) 33c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 34c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// NOTE: I didn't actually test all the control characters. Some may be 35c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// disallowed in the input, but they are all accepted escaped except for 0. 36c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// I also didn't test if characters affecting HTML parsing are allowed 37c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// unescaped, eg. (") or (#), which would indicate the beginning of the path. 38c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Surprisingly, space is accepted in the input and always escaped. 39c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 40c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// This table lists the canonical version of all characters we allow in the 41c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// input, with 0 indicating it is disallowed. We use the magic kEscapedHostChar 42c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// value to indicate that this character should be escaped. We are a little more 43c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// restrictive than IE, but less restrictive than Firefox. 44c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 45c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Note that we disallow the % character. We will allow it when part of an 46c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// escape sequence, of course, but this disallows "%25". Even though IE allows 47c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// it, allowing it would put us in a funny state. If there was an invalid 48c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// escape sequence like "%zz", we'll add "%25zz" to the output and fail. 49c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Allowing percents means we'll succeed a second time, so validity would change 50c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// based on how many times you run the canonicalizer. We prefer to always report 51c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// the same vailidity, so reject this. 52c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)const unsigned char kEsc = 0xff; 53c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)const unsigned char kHostCharLookup[0x80] = { 54c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 00-1f: all are invalid 55c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 56c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 57c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// ' ' ! " # $ % & ' ( ) * + , - . / 58c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) kEsc,kEsc,kEsc,kEsc,kEsc, 0, kEsc,kEsc,kEsc,kEsc,kEsc, '+',kEsc, '-', '.', 0, 59c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 0 1 2 3 4 5 6 7 8 9 : ; < = > ? 60c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', 0 ,kEsc,kEsc,kEsc, 0 , 61c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// @ A B C D E F G H I J K L M N O 62c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) kEsc, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 63c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// P Q R S T U V W X Y Z [ \ ] ^ _ 64c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '[', 0 , ']', 0 , '_', 65c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// ` a b c d e f g h i j k l m n o 66c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) kEsc, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 67c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// p q r s t u v w x y z { | } ~ 68c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',kEsc,kEsc,kEsc, 0 , 0 }; 69c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 70c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)const int kTempHostBufferLen = 1024; 71c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)typedef RawCanonOutputT<char, kTempHostBufferLen> StackBuffer; 727d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)typedef RawCanonOutputT<base::char16, kTempHostBufferLen> StackBufferW; 73c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 74c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Scans a host name and fills in the output flags according to what we find. 75c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// |has_non_ascii| will be true if there are any non-7-bit characters, and 76c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// |has_escaped| will be true if there is a percent sign. 77c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<typename CHAR, typename UCHAR> 780529e5d033099cbfc42635f6f6183833b09dff6eBen Murdochvoid ScanHostname(const CHAR* spec, 790529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch const Component& host, 800529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch bool* has_non_ascii, 810529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch bool* has_escaped) { 82c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int end = host.end(); 83c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) *has_non_ascii = false; 84c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) *has_escaped = false; 85c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) for (int i = host.begin; i < end; i++) { 86c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (static_cast<UCHAR>(spec[i]) >= 0x80) 87c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) *has_non_ascii = true; 88c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) else if (spec[i] == '%') 89c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) *has_escaped = true; 90c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 91c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 92c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 93c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Canonicalizes a host name that is entirely 8-bit characters (even though 94c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// the type holding them may be 16 bits. Escaped characters will be unescaped. 95c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Non-7-bit characters (for example, UTF-8) will be passed unchanged. 96c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 97c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// The |*has_non_ascii| flag will be true if there are non-7-bit characters in 98c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// the output. 99c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 100c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// This function is used in two situations: 101c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 102c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// * When the caller knows there is no non-ASCII or percent escaped 103c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// characters. This is what DoHost does. The result will be a completely 104c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// canonicalized host since we know nothing weird can happen (escaped 105c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// characters could be unescaped to non-7-bit, so they have to be treated 106c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// with suspicion at this point). It does not use the |has_non_ascii| flag. 107c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 108c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// * When the caller has an 8-bit string that may need unescaping. 109c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// DoComplexHost calls us this situation to do unescaping and validation. 110c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// After this, it may do other IDN operations depending on the value of the 111c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// |*has_non_ascii| flag. 112c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 113c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// The return value indicates if the output is a potentially valid host name. 114c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<typename INCHAR, typename OUTCHAR> 115c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)bool DoSimpleHost(const INCHAR* host, 116c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int host_len, 117c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) CanonOutputT<OUTCHAR>* output, 118c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) bool* has_non_ascii) { 119c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) *has_non_ascii = false; 120c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 121c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) bool success = true; 122c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) for (int i = 0; i < host_len; ++i) { 123c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) unsigned int source = host[i]; 124c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (source == '%') { 125c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Unescape first, if possible. 126c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Source will be used only if decode operation was successful. 127c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (!DecodeEscaped(host, &i, host_len, 128c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) reinterpret_cast<unsigned char*>(&source))) { 129c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Invalid escaped character. There is nothing that can make this 130c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // host valid. We append an escaped percent so the URL looks reasonable 131c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // and mark as failed. 132c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) AppendEscapedChar('%', output); 133c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) success = false; 134c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) continue; 135c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 136c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 137c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 138c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (source < 0x80) { 139c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // We have ASCII input, we can use our lookup table. 140c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) unsigned char replacement = kHostCharLookup[source]; 141c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (!replacement) { 142c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Invalid character, add it as percent-escaped and mark as failed. 143c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) AppendEscapedChar(source, output); 144c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) success = false; 145c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } else if (replacement == kEsc) { 146c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // This character is valid but should be escaped. 147c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) AppendEscapedChar(source, output); 148c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } else { 149c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Common case, the given character is valid in a hostname, the lookup 150c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // table tells us the canonical representation of that character (lower 151c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // cased). 152c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) output->push_back(replacement); 153c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 154c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } else { 155c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // It's a non-ascii char. Just push it to the output. 156c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // In case where we have char16 input, and char output it's safe to 157c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // cast char16->char only if input string was converted to ASCII. 158c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) output->push_back(static_cast<OUTCHAR>(source)); 159c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) *has_non_ascii = true; 160c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 161c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 162c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 163c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return success; 164c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 165c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 166c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Canonicalizes a host that requires IDN conversion. Returns true on success 1677d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)bool DoIDNHost(const base::char16* src, int src_len, CanonOutput* output) { 168c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // We need to escape URL before doing IDN conversion, since punicode strings 169c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // cannot be escaped after they are created. 170c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) RawCanonOutputW<kTempHostBufferLen> url_escaped_host; 171c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) bool has_non_ascii; 172c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) DoSimpleHost(src, src_len, &url_escaped_host, &has_non_ascii); 173c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 174c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) StackBufferW wide_output; 175c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (!IDNToASCII(url_escaped_host.data(), 176c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) url_escaped_host.length(), 177c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) &wide_output)) { 178c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Some error, give up. This will write some reasonable looking 179c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // representation of the string to the output. 180c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) AppendInvalidNarrowString(src, 0, src_len, output); 181c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return false; 182c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 183c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 184c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Now we check the ASCII output like a normal host. It will also handle 185c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // unescaping. Although we unescaped everything before this function call, if 186c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // somebody does %00 as fullwidth, ICU will convert this to ASCII. 187c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) bool success = DoSimpleHost(wide_output.data(), 188c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) wide_output.length(), 189c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) output, &has_non_ascii); 190c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) DCHECK(!has_non_ascii); 191c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return success; 192c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 193c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 194c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 8-bit convert host to its ASCII version: this converts the UTF-8 input to 195c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// UTF-16. The has_escaped flag should be set if the input string requires 196c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// unescaping. 197c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)bool DoComplexHost(const char* host, int host_len, 198c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) bool has_non_ascii, bool has_escaped, CanonOutput* output) { 199c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Save the current position in the output. We may write stuff and rewind it 200c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // below, so we need to know where to rewind to. 201c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int begin_length = output->length(); 202c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 203c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Points to the UTF-8 data we want to convert. This will either be the 204c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // input or the unescaped version written to |*output| if necessary. 205c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) const char* utf8_source; 206c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int utf8_source_len; 207c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (has_escaped) { 208c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Unescape before converting to UTF-16 for IDN. We write this into the 209c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // output because it most likely does not require IDNization, and we can 210c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // save another huge stack buffer. It will be replaced below if it requires 211c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // IDN. This will also update our non-ASCII flag so we know whether the 212c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // unescaped input requires IDN. 213c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (!DoSimpleHost(host, host_len, output, &has_non_ascii)) { 214c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Error with some escape sequence. We'll call the current output 215c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // complete. DoSimpleHost will have written some "reasonable" output. 216c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return false; 217c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 218c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 219c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Unescaping may have left us with ASCII input, in which case the 220c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // unescaped version we wrote to output is complete. 221c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (!has_non_ascii) { 222c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return true; 223c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 224c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 225c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Save the pointer into the data was just converted (it may be appended to 226c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // other data in the output buffer). 227c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) utf8_source = &output->data()[begin_length]; 228c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) utf8_source_len = output->length() - begin_length; 229c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } else { 230c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // We don't need to unescape, use input for IDNization later. (We know the 231c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // input has non-ASCII, or the simple version would have been called 232c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // instead of us.) 233c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) utf8_source = host; 234c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) utf8_source_len = host_len; 235c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 236c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 237c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Non-ASCII input requires IDN, convert to UTF-16 and do the IDN conversion. 238c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Above, we may have used the output to write the unescaped values to, so 239c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // we have to rewind it to where we started after we convert it to UTF-16. 240c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) StackBufferW utf16; 241c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (!ConvertUTF8ToUTF16(utf8_source, utf8_source_len, &utf16)) { 242c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // In this error case, the input may or may not be the output. 243c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) StackBuffer utf8; 244c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) for (int i = 0; i < utf8_source_len; i++) 245c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) utf8.push_back(utf8_source[i]); 246c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) output->set_length(begin_length); 247c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) AppendInvalidNarrowString(utf8.data(), 0, utf8.length(), output); 248c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return false; 249c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 250c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) output->set_length(begin_length); 251c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 252c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // This will call DoSimpleHost which will do normal ASCII canonicalization 253c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // and also check for IP addresses in the outpt. 254c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return DoIDNHost(utf16.data(), utf16.length(), output); 255c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 256c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 257c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// UTF-16 convert host to its ASCII version. The set up is already ready for 258c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// the backend, so we just pass through. The has_escaped flag should be set if 259c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// the input string requires unescaping. 2607d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)bool DoComplexHost(const base::char16* host, int host_len, 261c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) bool has_non_ascii, bool has_escaped, CanonOutput* output) { 262c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (has_escaped) { 263c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Yikes, we have escaped characters with wide input. The escaped 264c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // characters should be interpreted as UTF-8. To solve this problem, 265c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // we convert to UTF-8, unescape, then convert back to UTF-16 for IDN. 266c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // 267c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // We don't bother to optimize the conversion in the ASCII case (which 268c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // *could* just be a copy) and use the UTF-8 path, because it should be 269c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // very rare that host names have escaped characters, and it is relatively 270c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // fast to do the conversion anyway. 271c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) StackBuffer utf8; 272c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (!ConvertUTF16ToUTF8(host, host_len, &utf8)) { 273c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) AppendInvalidNarrowString(host, 0, host_len, output); 274c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return false; 275c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 276c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 277c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Once we convert to UTF-8, we can use the 8-bit version of the complex 278c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // host handling code above. 279c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return DoComplexHost(utf8.data(), utf8.length(), has_non_ascii, 280c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) has_escaped, output); 281c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 282c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 283c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // No unescaping necessary, we can safely pass the input to ICU. This 284c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // function will only get called if we either have escaped or non-ascii 285c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // input, so it's safe to just use ICU now. Even if the input is ASCII, 286c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // this function will do the right thing (just slower than we could). 287c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return DoIDNHost(host, host_len, output); 288c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 289c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 290c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<typename CHAR, typename UCHAR> 291c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void DoHost(const CHAR* spec, 2920529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch const Component& host, 293c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) CanonOutput* output, 294c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) CanonHostInfo* host_info) { 295c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (host.len <= 0) { 296c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Empty hosts don't need anything. 297c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) host_info->family = CanonHostInfo::NEUTRAL; 2980529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch host_info->out_host = Component(); 299c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return; 300c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 301c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 302c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) bool has_non_ascii, has_escaped; 303c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) ScanHostname<CHAR, UCHAR>(spec, host, &has_non_ascii, &has_escaped); 304c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 305c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Keep track of output's initial length, so we can rewind later. 306c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) const int output_begin = output->length(); 307c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 308c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) bool success; 309c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (!has_non_ascii && !has_escaped) { 310c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) success = DoSimpleHost(&spec[host.begin], host.len, 311c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) output, &has_non_ascii); 312c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) DCHECK(!has_non_ascii); 313c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } else { 314c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) success = DoComplexHost(&spec[host.begin], host.len, 315c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) has_non_ascii, has_escaped, output); 316c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 317c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 318c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (!success) { 319c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Canonicalization failed. Set BROKEN to notify the caller. 320c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) host_info->family = CanonHostInfo::BROKEN; 321c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } else { 322c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // After all the other canonicalization, check if we ended up with an IP 323c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // address. IP addresses are small, so writing into this temporary buffer 324c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // should not cause an allocation. 325c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) RawCanonOutput<64> canon_ip; 326c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) CanonicalizeIPAddress(output->data(), 3270529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch MakeRange(output_begin, output->length()), 328c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) &canon_ip, host_info); 329c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 330c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // If we got an IPv4/IPv6 address, copy the canonical form back to the 331c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // real buffer. Otherwise, it's a hostname or broken IP, in which case 332c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // we just leave it in place. 333c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (host_info->IsIPAddress()) { 334c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) output->set_length(output_begin); 335c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) output->Append(canon_ip.data(), canon_ip.length()); 336c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 337c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 338c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 3390529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch host_info->out_host = MakeRange(output_begin, output->length()); 340c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 341c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 342c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} // namespace 343c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 344c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)bool CanonicalizeHost(const char* spec, 3450529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch const Component& host, 346c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) CanonOutput* output, 3470529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch Component* out_host) { 348c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) CanonHostInfo host_info; 349c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) DoHost<char, unsigned char>(spec, host, output, &host_info); 350c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) *out_host = host_info.out_host; 351c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return (host_info.family != CanonHostInfo::BROKEN); 352c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 353c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 3547d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)bool CanonicalizeHost(const base::char16* spec, 3550529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch const Component& host, 356c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) CanonOutput* output, 3570529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch Component* out_host) { 358c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) CanonHostInfo host_info; 3597d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) DoHost<base::char16, base::char16>(spec, host, output, &host_info); 360c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) *out_host = host_info.out_host; 361c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return (host_info.family != CanonHostInfo::BROKEN); 362c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 363c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 364c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void CanonicalizeHostVerbose(const char* spec, 3650529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch const Component& host, 366c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) CanonOutput* output, 3670529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch CanonHostInfo* host_info) { 368c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) DoHost<char, unsigned char>(spec, host, output, host_info); 369c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 370c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 3717d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)void CanonicalizeHostVerbose(const base::char16* spec, 3720529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch const Component& host, 373c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) CanonOutput* output, 3740529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch CanonHostInfo* host_info) { 3757d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) DoHost<base::char16, base::char16>(spec, host, output, host_info); 376c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 377c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 3780529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch} // namespace url 379