1c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Copyright 2013 The Chromium Authors. All rights reserved. 2c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 3c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// found in the LICENSE file. 4c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 5c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#ifndef URL_URL_CANON_H_ 6c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#define URL_URL_CANON_H_ 7c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 8c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include <stdlib.h> 9c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include <string.h> 10c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 117d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)#include "base/strings/string16.h" 12868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "url/url_export.h" 13c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "url/url_parse.h" 14c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 15c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)namespace url_canon { 16c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 17c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Canonicalizer output ------------------------------------------------------- 18c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 19c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Base class for the canonicalizer output, this maintains a buffer and 20c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// supports simple resizing and append operations on it. 21c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 22c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// It is VERY IMPORTANT that no virtual function calls be made on the common 23c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// code path. We only have two virtual function calls, the destructor and a 24c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// resize function that is called when the existing buffer is not big enough. 25c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// The derived class is then in charge of setting up our buffer which we will 26c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// manage. 27c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<typename T> 28c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)class CanonOutputT { 29c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) public: 30c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) CanonOutputT() : buffer_(NULL), buffer_len_(0), cur_len_(0) { 31c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 32c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) virtual ~CanonOutputT() { 33c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 34c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 35c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Implemented to resize the buffer. This function should update the buffer 36c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // pointer to point to the new buffer, and any old data up to |cur_len_| in 37c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // the buffer must be copied over. 38c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // 39c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // The new size |sz| must be larger than buffer_len_. 40c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) virtual void Resize(int sz) = 0; 41c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 42c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Accessor for returning a character at a given position. The input offset 43c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // must be in the valid range. 44c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) inline char at(int offset) const { 45c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return buffer_[offset]; 46c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 47c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 48c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Sets the character at the given position. The given position MUST be less 49c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // than the length(). 50c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) inline void set(int offset, int ch) { 51c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) buffer_[offset] = ch; 52c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 53c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 54c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Returns the number of characters currently in the buffer. 55c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) inline int length() const { 56c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return cur_len_; 57c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 58c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 59c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Returns the current capacity of the buffer. The length() is the number of 60c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // characters that have been declared to be written, but the capacity() is 61c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // the number that can be written without reallocation. If the caller must 62c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // write many characters at once, it can make sure there is enough capacity, 63c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // write the data, then use set_size() to declare the new length(). 64c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int capacity() const { 65c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return buffer_len_; 66c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 67c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 68c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Called by the user of this class to get the output. The output will NOT 69c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // be NULL-terminated. Call length() to get the 70c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // length. 71c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) const T* data() const { 72c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return buffer_; 73c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 74c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) T* data() { 75c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return buffer_; 76c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 77c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 78c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Shortens the URL to the new length. Used for "backing up" when processing 79c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // relative paths. This can also be used if an external function writes a lot 80c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // of data to the buffer (when using the "Raw" version below) beyond the end, 81c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // to declare the new length. 82c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // 83c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // This MUST NOT be used to expand the size of the buffer beyond capacity(). 84c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) void set_length(int new_len) { 85c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) cur_len_ = new_len; 86c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 87c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 88c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // This is the most performance critical function, since it is called for 89c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // every character. 90c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) void push_back(T ch) { 91c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // In VC2005, putting this common case first speeds up execution 92c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // dramatically because this branch is predicted as taken. 93c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (cur_len_ < buffer_len_) { 94c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) buffer_[cur_len_] = ch; 95c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) cur_len_++; 96c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return; 97c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 98c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 99c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Grow the buffer to hold at least one more item. Hopefully we won't have 100c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // to do this very often. 101c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (!Grow(1)) 102c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return; 103c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 104c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Actually do the insertion. 105c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) buffer_[cur_len_] = ch; 106c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) cur_len_++; 107c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 108c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 109c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Appends the given string to the output. 110c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) void Append(const T* str, int str_len) { 111c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (cur_len_ + str_len > buffer_len_) { 112c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (!Grow(cur_len_ + str_len - buffer_len_)) 113c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return; 114c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 115c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) for (int i = 0; i < str_len; i++) 116c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) buffer_[cur_len_ + i] = str[i]; 117c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) cur_len_ += str_len; 118c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 119c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 120c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) protected: 121c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Grows the given buffer so that it can fit at least |min_additional| 122c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // characters. Returns true if the buffer could be resized, false on OOM. 123c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) bool Grow(int min_additional) { 124c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) static const int kMinBufferLen = 16; 125c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int new_len = (buffer_len_ == 0) ? kMinBufferLen : buffer_len_; 126c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) do { 127c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (new_len >= (1 << 30)) // Prevent overflow below. 128c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return false; 129c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) new_len *= 2; 130c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } while (new_len < buffer_len_ + min_additional); 131c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Resize(new_len); 132c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return true; 133c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 134c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 135c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) T* buffer_; 136c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int buffer_len_; 137c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 138c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Used characters in the buffer. 139c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int cur_len_; 140c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}; 141c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 142c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Simple implementation of the CanonOutput using new[]. This class 143c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// also supports a static buffer so if it is allocated on the stack, most 144c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// URLs can be canonicalized with no heap allocations. 145c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<typename T, int fixed_capacity = 1024> 146c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)class RawCanonOutputT : public CanonOutputT<T> { 147c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) public: 148c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) RawCanonOutputT() : CanonOutputT<T>() { 149c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) this->buffer_ = fixed_buffer_; 150c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) this->buffer_len_ = fixed_capacity; 151c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 152c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) virtual ~RawCanonOutputT() { 153c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (this->buffer_ != fixed_buffer_) 154c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) delete[] this->buffer_; 155c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 156c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 157c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) virtual void Resize(int sz) { 158c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) T* new_buf = new T[sz]; 159c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) memcpy(new_buf, this->buffer_, 160c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) sizeof(T) * (this->cur_len_ < sz ? this->cur_len_ : sz)); 161c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (this->buffer_ != fixed_buffer_) 162c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) delete[] this->buffer_; 163c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) this->buffer_ = new_buf; 164c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) this->buffer_len_ = sz; 165c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 166c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 167c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) protected: 168c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) T fixed_buffer_[fixed_capacity]; 169c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}; 170c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 171c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Normally, all canonicalization output is in narrow characters. We support 172c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// the templates so it can also be used internally if a wide buffer is 173c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// required. 174c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)typedef CanonOutputT<char> CanonOutput; 1757d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)typedef CanonOutputT<base::char16> CanonOutputW; 176c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 177c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<int fixed_capacity> 178c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)class RawCanonOutput : public RawCanonOutputT<char, fixed_capacity> {}; 179c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<int fixed_capacity> 1807d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)class RawCanonOutputW : public RawCanonOutputT<base::char16, fixed_capacity> {}; 181c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 182c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Character set converter ---------------------------------------------------- 183c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 184c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Converts query strings into a custom encoding. The embedder can supply an 185c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// implementation of this class to interface with their own character set 186c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// conversion libraries. 187c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 188c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Embedders will want to see the unit test for the ICU version. 189c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 190868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)class URL_EXPORT CharsetConverter { 191c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) public: 192c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) CharsetConverter() {} 193c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) virtual ~CharsetConverter() {} 194c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 195c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Converts the given input string from UTF-16 to whatever output format the 196c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // converter supports. This is used only for the query encoding conversion, 197c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // which does not fail. Instead, the converter should insert "invalid 198c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // character" characters in the output for invalid sequences, and do the 199c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // best it can. 200c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // 201c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // If the input contains a character not representable in the output 202c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // character set, the converter should append the HTML entity sequence in 203c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // decimal, (such as "你") with escaping of the ampersand, number 204c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // sign, and semicolon (in the previous example it would be 205c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // "%26%2320320%3B"). This rule is based on what IE does in this situation. 2067d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) virtual void ConvertFromUTF16(const base::char16* input, 207c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int input_len, 208c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) CanonOutput* output) = 0; 209c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}; 210c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 211c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Whitespace ----------------------------------------------------------------- 212c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 213c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Searches for whitespace that should be removed from the middle of URLs, and 214c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// removes it. Removed whitespace are tabs and newlines, but NOT spaces. Spaces 215c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// are preserved, which is what most browsers do. A pointer to the output will 216c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// be returned, and the length of that output will be in |output_len|. 217c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 218c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// This should be called before parsing if whitespace removal is desired (which 219c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// it normally is when you are canonicalizing). 220c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 221c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// If no whitespace is removed, this function will not use the buffer and will 222c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// return a pointer to the input, to avoid the extra copy. If modification is 223c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// required, the given |buffer| will be used and the returned pointer will 224c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// point to the beginning of the buffer. 225c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 2267d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)// Therefore, callers should not use the buffer, since it may actually be empty, 227c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// use the computed pointer and |*output_len| instead. 228868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)URL_EXPORT const char* RemoveURLWhitespace(const char* input, int input_len, 229868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutputT<char>* buffer, 230868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) int* output_len); 2317d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)URL_EXPORT const base::char16* RemoveURLWhitespace( 2327d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) const base::char16* input, 2337d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) int input_len, 2347d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) CanonOutputT<base::char16>* buffer, 2357d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) int* output_len); 236c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 237c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// IDN ------------------------------------------------------------------------ 238c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 239c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Converts the Unicode input representing a hostname to ASCII using IDN rules. 240c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// The output must fall in the ASCII range, but will be encoded in UTF-16. 241c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 242c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// On success, the output will be filled with the ASCII host name and it will 243c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// return true. Unlike most other canonicalization functions, this assumes that 244c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// the output is empty. The beginning of the host will be at offset 0, and 245c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// the length of the output will be set to the length of the new host name. 246c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 247c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// On error, returns false. The output in this case is undefined. 2487d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)URL_EXPORT bool IDNToASCII(const base::char16* src, 249868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) int src_len, 250868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutputW* output); 251c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 252c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Piece-by-piece canonicalizers ---------------------------------------------- 253c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 254c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// These individual canonicalizers append the canonicalized versions of the 255c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// corresponding URL component to the given std::string. The spec and the 256c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// previously-identified range of that component are the input. The range of 257c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// the canonicalized component will be written to the output component. 258c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 259c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// These functions all append to the output so they can be chained. Make sure 260c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// the output is empty when you start. 261c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 262c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// These functions returns boolean values indicating success. On failure, they 263c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// will attempt to write something reasonable to the output so that, if 264c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// displayed to the user, they will recognise it as something that's messed up. 265c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Nothing more should ever be done with these invalid URLs, however. 266c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 267c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Scheme: Appends the scheme and colon to the URL. The output component will 268c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// indicate the range of characters up to but not including the colon. 269c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 270c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Canonical URLs always have a scheme. If the scheme is not present in the 271c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// input, this will just write the colon to indicate an empty scheme. Does not 272c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// append slashes which will be needed before any authority components for most 273c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// URLs. 274c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 275c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// The 8-bit version requires UTF-8 encoding. 276868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)URL_EXPORT bool CanonicalizeScheme(const char* spec, 277868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Component& scheme, 278868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 279868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Component* out_scheme); 2807d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)URL_EXPORT bool CanonicalizeScheme(const base::char16* spec, 281868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Component& scheme, 282868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 283868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Component* out_scheme); 284c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 285c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// User info: username/password. If present, this will add the delimiters so 286c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// the output will be "<username>:<password>@" or "<username>@". Empty 287c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// username/password pairs, or empty passwords, will get converted to 288c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// nonexistant in the canonical version. 289c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 290c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// The components for the username and password refer to ranges in the 291c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// respective source strings. Usually, these will be the same string, which 292c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// is legal as long as the two components don't overlap. 293c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 294c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// The 8-bit version requires UTF-8 encoding. 295868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)URL_EXPORT bool CanonicalizeUserInfo(const char* username_source, 296868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Component& username, 297868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const char* password_source, 298868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Component& password, 299868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 300868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Component* out_username, 301868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Component* out_password); 3027d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)URL_EXPORT bool CanonicalizeUserInfo(const base::char16* username_source, 303868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Component& username, 3047d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) const base::char16* password_source, 305868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Component& password, 306868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 307868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Component* out_username, 308868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Component* out_password); 309c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 310c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 311c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// This structure holds detailed state exported from the IP/Host canonicalizers. 312c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Additional fields may be added as callers require them. 313c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)struct CanonHostInfo { 314c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) CanonHostInfo() : family(NEUTRAL), num_ipv4_components(0), out_host() {} 315c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 316c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Convenience function to test if family is an IP address. 317c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) bool IsIPAddress() const { return family == IPV4 || family == IPV6; } 318c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 319c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // This field summarizes how the input was classified by the canonicalizer. 320c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) enum Family { 321c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) NEUTRAL, // - Doesn't resemble an IP address. As far as the IP 322c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // canonicalizer is concerned, it should be treated as a 323c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // hostname. 324c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) BROKEN, // - Almost an IP, but was not canonicalized. This could be an 325c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // IPv4 address where truncation occurred, or something 326c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // containing the special characters :[] which did not parse 327c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // as an IPv6 address. Never attempt to connect to this 328c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // address, because it might actually succeed! 329c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) IPV4, // - Successfully canonicalized as an IPv4 address. 330c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) IPV6, // - Successfully canonicalized as an IPv6 address. 331c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) }; 332c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Family family; 333c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 334c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // If |family| is IPV4, then this is the number of nonempty dot-separated 335c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // components in the input text, from 1 to 4. If |family| is not IPV4, 336c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // this value is undefined. 337c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int num_ipv4_components; 338c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 339c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Location of host within the canonicalized output. 340c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // CanonicalizeIPAddress() only sets this field if |family| is IPV4 or IPV6. 341c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // CanonicalizeHostVerbose() always sets it. 342c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) url_parse::Component out_host; 343c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 344c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // |address| contains the parsed IP Address (if any) in its first 345c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // AddressLength() bytes, in network order. If IsIPAddress() is false 346c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // AddressLength() will return zero and the content of |address| is undefined. 347c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) unsigned char address[16]; 348c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 349c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Convenience function to calculate the length of an IP address corresponding 350c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // to the current IP version in |family|, if any. For use with |address|. 351c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int AddressLength() const { 352c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return family == IPV4 ? 4 : (family == IPV6 ? 16 : 0); 353c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 354c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}; 355c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 356c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 357c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Host. 358c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 359c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// The 8-bit version requires UTF-8 encoding. Use this version when you only 360c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// need to know whether canonicalization succeeded. 361868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)URL_EXPORT bool CanonicalizeHost(const char* spec, 362868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Component& host, 363868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 364868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Component* out_host); 3657d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)URL_EXPORT bool CanonicalizeHost(const base::char16* spec, 366868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Component& host, 367868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 368868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Component* out_host); 369c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 370c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Extended version of CanonicalizeHost, which returns additional information. 371c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Use this when you need to know whether the hostname was an IP address. 372c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// A successful return is indicated by host_info->family != BROKEN. See the 373c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// definition of CanonHostInfo above for details. 374868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)URL_EXPORT void CanonicalizeHostVerbose(const char* spec, 375868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Component& host, 376868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 377868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonHostInfo* host_info); 3787d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)URL_EXPORT void CanonicalizeHostVerbose(const base::char16* spec, 379868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Component& host, 380868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 381868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonHostInfo* host_info); 382c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 383c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 384c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// IP addresses. 385c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 386c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Tries to interpret the given host name as an IPv4 or IPv6 address. If it is 387c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// an IP address, it will canonicalize it as such, appending it to |output|. 388c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Additional status information is returned via the |*host_info| parameter. 389c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// See the definition of CanonHostInfo above for details. 390c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 391c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// This is called AUTOMATICALLY from the host canonicalizer, which ensures that 392c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// the input is unescaped and name-prepped, etc. It should not normally be 393c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// necessary or wise to call this directly. 394868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)URL_EXPORT void CanonicalizeIPAddress(const char* spec, 395868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Component& host, 396868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 397868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonHostInfo* host_info); 3987d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)URL_EXPORT void CanonicalizeIPAddress(const base::char16* spec, 399868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Component& host, 400868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 401868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonHostInfo* host_info); 402c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 403c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Port: this function will add the colon for the port if a port is present. 404c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// The caller can pass url_parse::PORT_UNSPECIFIED as the 405c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// default_port_for_scheme argument if there is no default port. 406c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 407c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// The 8-bit version requires UTF-8 encoding. 408868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)URL_EXPORT bool CanonicalizePort(const char* spec, 409868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Component& port, 410868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) int default_port_for_scheme, 411868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 412868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Component* out_port); 4137d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)URL_EXPORT bool CanonicalizePort(const base::char16* spec, 414868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Component& port, 415868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) int default_port_for_scheme, 416868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 417868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Component* out_port); 418c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 419c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Returns the default port for the given canonical scheme, or PORT_UNSPECIFIED 420c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// if the scheme is unknown. 421868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)URL_EXPORT int DefaultPortForScheme(const char* scheme, int scheme_len); 422c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 423c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Path. If the input does not begin in a slash (including if the input is 424c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// empty), we'll prepend a slash to the path to make it canonical. 425c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 426c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// The 8-bit version assumes UTF-8 encoding, but does not verify the validity 427c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// of the UTF-8 (i.e., you can have invalid UTF-8 sequences, invalid 428c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// characters, etc.). Normally, URLs will come in as UTF-16, so this isn't 429c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// an issue. Somebody giving us an 8-bit path is responsible for generating 430c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// the path that the server expects (we'll escape high-bit characters), so 431c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// if something is invalid, it's their problem. 432868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)URL_EXPORT bool CanonicalizePath(const char* spec, 433868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Component& path, 434868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 435868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Component* out_path); 4367d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)URL_EXPORT bool CanonicalizePath(const base::char16* spec, 437868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Component& path, 438868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 439868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Component* out_path); 440c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 441c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Canonicalizes the input as a file path. This is like CanonicalizePath except 442c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// that it also handles Windows drive specs. For example, the path can begin 443c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// with "c|\" and it will get properly canonicalized to "C:/". 444c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// The string will be appended to |*output| and |*out_path| will be updated. 445c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 446c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// The 8-bit version requires UTF-8 encoding. 447868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)URL_EXPORT bool FileCanonicalizePath(const char* spec, 448868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Component& path, 449868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 450868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Component* out_path); 4517d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)URL_EXPORT bool FileCanonicalizePath(const base::char16* spec, 452868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Component& path, 453868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 454868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Component* out_path); 455c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 456c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Query: Prepends the ? if needed. 457c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 458c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// The 8-bit version requires the input to be UTF-8 encoding. Incorrectly 459c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// encoded characters (in UTF-8 or UTF-16) will be replaced with the Unicode 460c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// "invalid character." This function can not fail, we always just try to do 461c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// our best for crazy input here since web pages can set it themselves. 462c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 463c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// This will convert the given input into the output encoding that the given 464c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// character set converter object provides. The converter will only be called 465c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// if necessary, for ASCII input, no conversions are necessary. 466c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 467c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// The converter can be NULL. In this case, the output encoding will be UTF-8. 468868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)URL_EXPORT void CanonicalizeQuery(const char* spec, 469868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Component& query, 470868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CharsetConverter* converter, 471868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 472868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Component* out_query); 4737d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)URL_EXPORT void CanonicalizeQuery(const base::char16* spec, 474868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Component& query, 475868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CharsetConverter* converter, 476868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 477868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Component* out_query); 478c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 479c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Ref: Prepends the # if needed. The output will be UTF-8 (this is the only 480c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// canonicalizer that does not produce ASCII output). The output is 481c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// guaranteed to be valid UTF-8. 482c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 483c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// This function will not fail. If the input is invalid UTF-8/UTF-16, we'll use 484c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// the "Unicode replacement character" for the confusing bits and copy the rest. 485868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)URL_EXPORT void CanonicalizeRef(const char* spec, 486868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Component& path, 487868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 488868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Component* out_path); 4897d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)URL_EXPORT void CanonicalizeRef(const base::char16* spec, 490868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Component& path, 491868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 492868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Component* out_path); 493c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 494c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Full canonicalizer --------------------------------------------------------- 495c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 496c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// These functions replace any string contents, rather than append as above. 497c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// See the above piece-by-piece functions for information specific to 498c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// canonicalizing individual components. 499c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 500c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// The output will be ASCII except the reference fragment, which may be UTF-8. 501c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 502c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// The 8-bit versions require UTF-8 encoding. 503c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 504c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Use for standard URLs with authorities and paths. 505868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)URL_EXPORT bool CanonicalizeStandardURL(const char* spec, 506868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) int spec_len, 507868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Parsed& parsed, 508868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CharsetConverter* query_converter, 509868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 510868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Parsed* new_parsed); 5117d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)URL_EXPORT bool CanonicalizeStandardURL(const base::char16* spec, 512868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) int spec_len, 513868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Parsed& parsed, 514868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CharsetConverter* query_converter, 515868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 516868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Parsed* new_parsed); 517c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 518c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Use for file URLs. 519868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)URL_EXPORT bool CanonicalizeFileURL(const char* spec, 520868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) int spec_len, 521868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Parsed& parsed, 522868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CharsetConverter* query_converter, 523868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 524868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Parsed* new_parsed); 5257d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)URL_EXPORT bool CanonicalizeFileURL(const base::char16* spec, 526868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) int spec_len, 527868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Parsed& parsed, 528868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CharsetConverter* query_converter, 529868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 530868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Parsed* new_parsed); 531c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 532c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Use for filesystem URLs. 533868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)URL_EXPORT bool CanonicalizeFileSystemURL(const char* spec, 534868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) int spec_len, 535868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Parsed& parsed, 536868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CharsetConverter* query_converter, 537868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 538868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Parsed* new_parsed); 5397d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)URL_EXPORT bool CanonicalizeFileSystemURL(const base::char16* spec, 540868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) int spec_len, 541868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Parsed& parsed, 542868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CharsetConverter* query_converter, 543868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 544868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Parsed* new_parsed); 545c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 546c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Use for path URLs such as javascript. This does not modify the path in any 547c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// way, for example, by escaping it. 548868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)URL_EXPORT bool CanonicalizePathURL(const char* spec, 549868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) int spec_len, 550868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Parsed& parsed, 551868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 552868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Parsed* new_parsed); 5537d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)URL_EXPORT bool CanonicalizePathURL(const base::char16* spec, 554868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) int spec_len, 555868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Parsed& parsed, 556868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 557868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Parsed* new_parsed); 558c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 559c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Use for mailto URLs. This "canonicalizes" the url into a path and query 560c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// component. It does not attempt to merge "to" fields. It uses UTF-8 for 561c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// the query encoding if there is a query. This is because a mailto URL is 562c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// really intended for an external mail program, and the encoding of a page, 563c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// etc. which would influence a query encoding normally are irrelevant. 564868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)URL_EXPORT bool CanonicalizeMailtoURL(const char* spec, 565868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) int spec_len, 566868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Parsed& parsed, 567868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 568868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Parsed* new_parsed); 5697d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)URL_EXPORT bool CanonicalizeMailtoURL(const base::char16* spec, 570868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) int spec_len, 571868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Parsed& parsed, 572868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 573868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Parsed* new_parsed); 574c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 575c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Part replacer -------------------------------------------------------------- 576c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 577c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Internal structure used for storing separate strings for each component. 578c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// The basic canonicalization functions use this structure internally so that 579c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// component replacement (different strings for different components) can be 580c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// treated on the same code path as regular canonicalization (the same string 581c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// for each component). 582c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 583c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// A url_parse::Parsed structure usually goes along with this. Those 584c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// components identify offsets within these strings, so that they can all be 585c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// in the same string, or spread arbitrarily across different ones. 586c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 587c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// This structures does not own any data. It is the caller's responsibility to 588c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// ensure that the data the pointers point to stays in scope and is not 589c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// modified. 590c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<typename CHAR> 591c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)struct URLComponentSource { 592c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Constructor normally used by callers wishing to replace components. This 593c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // will make them all NULL, which is no replacement. The caller would then 594c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // override the components they want to replace. 595c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) URLComponentSource() 596c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) : scheme(NULL), 597c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) username(NULL), 598c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) password(NULL), 599c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) host(NULL), 600c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) port(NULL), 601c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) path(NULL), 602c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) query(NULL), 603c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) ref(NULL) { 604c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 605c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 606c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Constructor normally used internally to initialize all the components to 607c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // point to the same spec. 608c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) explicit URLComponentSource(const CHAR* default_value) 609c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) : scheme(default_value), 610c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) username(default_value), 611c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) password(default_value), 612c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) host(default_value), 613c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) port(default_value), 614c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) path(default_value), 615c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) query(default_value), 616c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) ref(default_value) { 617c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 618c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 619c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) const CHAR* scheme; 620c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) const CHAR* username; 621c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) const CHAR* password; 622c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) const CHAR* host; 623c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) const CHAR* port; 624c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) const CHAR* path; 625c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) const CHAR* query; 626c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) const CHAR* ref; 627c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}; 628c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 629c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// This structure encapsulates information on modifying a URL. Each component 630c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// may either be left unchanged, replaced, or deleted. 631c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 632c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// By default, each component is unchanged. For those components that should be 633c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// modified, call either Set* or Clear* to modify it. 634c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 635c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// The string passed to Set* functions DOES NOT GET COPIED AND MUST BE KEPT 636c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// IN SCOPE BY THE CALLER for as long as this object exists! 637c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 638c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Prefer the 8-bit replacement version if possible since it is more efficient. 639c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<typename CHAR> 640c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)class Replacements { 641c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) public: 642c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Replacements() { 643c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 644c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 645c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Scheme 646c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) void SetScheme(const CHAR* s, const url_parse::Component& comp) { 647c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) sources_.scheme = s; 648c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) components_.scheme = comp; 649c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 650c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Note: we don't have a ClearScheme since this doesn't make any sense. 651c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) bool IsSchemeOverridden() const { return sources_.scheme != NULL; } 652c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 653c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Username 654c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) void SetUsername(const CHAR* s, const url_parse::Component& comp) { 655c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) sources_.username = s; 656c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) components_.username = comp; 657c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 658c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) void ClearUsername() { 659c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) sources_.username = Placeholder(); 660c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) components_.username = url_parse::Component(); 661c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 662c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) bool IsUsernameOverridden() const { return sources_.username != NULL; } 663c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 664c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Password 665c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) void SetPassword(const CHAR* s, const url_parse::Component& comp) { 666c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) sources_.password = s; 667c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) components_.password = comp; 668c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 669c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) void ClearPassword() { 670c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) sources_.password = Placeholder(); 671c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) components_.password = url_parse::Component(); 672c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 673c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) bool IsPasswordOverridden() const { return sources_.password != NULL; } 674c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 675c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Host 676c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) void SetHost(const CHAR* s, const url_parse::Component& comp) { 677c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) sources_.host = s; 678c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) components_.host = comp; 679c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 680c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) void ClearHost() { 681c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) sources_.host = Placeholder(); 682c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) components_.host = url_parse::Component(); 683c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 684c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) bool IsHostOverridden() const { return sources_.host != NULL; } 685c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 686c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Port 687c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) void SetPort(const CHAR* s, const url_parse::Component& comp) { 688c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) sources_.port = s; 689c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) components_.port = comp; 690c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 691c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) void ClearPort() { 692c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) sources_.port = Placeholder(); 693c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) components_.port = url_parse::Component(); 694c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 695c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) bool IsPortOverridden() const { return sources_.port != NULL; } 696c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 697c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Path 698c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) void SetPath(const CHAR* s, const url_parse::Component& comp) { 699c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) sources_.path = s; 700c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) components_.path = comp; 701c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 702c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) void ClearPath() { 703c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) sources_.path = Placeholder(); 704c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) components_.path = url_parse::Component(); 705c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 706c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) bool IsPathOverridden() const { return sources_.path != NULL; } 707c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 708c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Query 709c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) void SetQuery(const CHAR* s, const url_parse::Component& comp) { 710c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) sources_.query = s; 711c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) components_.query = comp; 712c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 713c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) void ClearQuery() { 714c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) sources_.query = Placeholder(); 715c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) components_.query = url_parse::Component(); 716c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 717c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) bool IsQueryOverridden() const { return sources_.query != NULL; } 718c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 719c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Ref 720c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) void SetRef(const CHAR* s, const url_parse::Component& comp) { 721c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) sources_.ref = s; 722c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) components_.ref = comp; 723c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 724c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) void ClearRef() { 725c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) sources_.ref = Placeholder(); 726c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) components_.ref = url_parse::Component(); 727c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 728c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) bool IsRefOverridden() const { return sources_.ref != NULL; } 729c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 730c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Getters for the itnernal data. See the variables below for how the 731c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // information is encoded. 732c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) const URLComponentSource<CHAR>& sources() const { return sources_; } 733c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) const url_parse::Parsed& components() const { return components_; } 734c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 735c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) private: 736c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Returns a pointer to a static empty string that is used as a placeholder 737c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // to indicate a component should be deleted (see below). 738c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) const CHAR* Placeholder() { 739c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) static const CHAR empty_string = 0; 740c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return &empty_string; 741c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 742c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 743c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // We support three states: 744c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // 745c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Action | Source Component 746c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // -----------------------+-------------------------------------------------- 747c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Don't change component | NULL (unused) 748c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Replace component | (replacement string) (replacement component) 749c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Delete component | (non-NULL) (invalid component: (0,-1)) 750c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // 751c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // We use a pointer to the empty string for the source when the component 752c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // should be deleted. 753c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) URLComponentSource<CHAR> sources_; 754c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) url_parse::Parsed components_; 755c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}; 756c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 757c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// The base must be an 8-bit canonical URL. 758868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)URL_EXPORT bool ReplaceStandardURL(const char* base, 759868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Parsed& base_parsed, 760868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const Replacements<char>& replacements, 761868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CharsetConverter* query_converter, 762868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 763868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Parsed* new_parsed); 7647d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)URL_EXPORT bool ReplaceStandardURL( 7657d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) const char* base, 7667d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) const url_parse::Parsed& base_parsed, 7677d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) const Replacements<base::char16>& replacements, 7687d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) CharsetConverter* query_converter, 7697d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) CanonOutput* output, 7707d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) url_parse::Parsed* new_parsed); 771c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 772c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Filesystem URLs can only have the path, query, or ref replaced. 773c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// All other components will be ignored. 774868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)URL_EXPORT bool ReplaceFileSystemURL(const char* base, 775868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Parsed& base_parsed, 776868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const Replacements<char>& replacements, 777868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CharsetConverter* query_converter, 778868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 779868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Parsed* new_parsed); 7807d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)URL_EXPORT bool ReplaceFileSystemURL( 7817d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) const char* base, 7827d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) const url_parse::Parsed& base_parsed, 7837d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) const Replacements<base::char16>& replacements, 7847d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) CharsetConverter* query_converter, 7857d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) CanonOutput* output, 7867d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) url_parse::Parsed* new_parsed); 787c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 788c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Replacing some parts of a file URL is not permitted. Everything except 789c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// the host, path, query, and ref will be ignored. 790868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)URL_EXPORT bool ReplaceFileURL(const char* base, 791868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Parsed& base_parsed, 792868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const Replacements<char>& replacements, 793868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CharsetConverter* query_converter, 794868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 795868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Parsed* new_parsed); 796868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)URL_EXPORT bool ReplaceFileURL(const char* base, 797868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Parsed& base_parsed, 7987d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) const Replacements<base::char16>& replacements, 799868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CharsetConverter* query_converter, 800868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 801868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Parsed* new_parsed); 802c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 803c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Path URLs can only have the scheme and path replaced. All other components 804c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// will be ignored. 805868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)URL_EXPORT bool ReplacePathURL(const char* base, 806868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Parsed& base_parsed, 807868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const Replacements<char>& replacements, 808868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 809868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Parsed* new_parsed); 810868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)URL_EXPORT bool ReplacePathURL(const char* base, 811868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Parsed& base_parsed, 8127d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) const Replacements<base::char16>& replacements, 813868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 814868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Parsed* new_parsed); 815c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 816c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Mailto URLs can only have the scheme, path, and query replaced. 817c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// All other components will be ignored. 818868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)URL_EXPORT bool ReplaceMailtoURL(const char* base, 819868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Parsed& base_parsed, 820868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const Replacements<char>& replacements, 821868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 822868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Parsed* new_parsed); 823868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)URL_EXPORT bool ReplaceMailtoURL(const char* base, 824868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Parsed& base_parsed, 8257d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) const Replacements<base::char16>& replacements, 826868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 827868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Parsed* new_parsed); 828c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 829c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Relative URL --------------------------------------------------------------- 830c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 831c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Given an input URL or URL fragment |fragment|, determines if it is a 832c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// relative or absolute URL and places the result into |*is_relative|. If it is 833c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// relative, the relevant portion of the URL will be placed into 834c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// |*relative_component| (there may have been trimmed whitespace, for example). 835c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// This value is passed to ResolveRelativeURL. If the input is not relative, 836c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// this value is UNDEFINED (it may be changed by the function). 837c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 838c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Returns true on success (we successfully determined the URL is relative or 839c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// not). Failure means that the combination of URLs doesn't make any sense. 840c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 841c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// The base URL should always be canonical, therefore is ASCII. 842868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)URL_EXPORT bool IsRelativeURL(const char* base, 843868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Parsed& base_parsed, 844868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const char* fragment, 845868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) int fragment_len, 846868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) bool is_base_hierarchical, 847868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) bool* is_relative, 848868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Component* relative_component); 849868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)URL_EXPORT bool IsRelativeURL(const char* base, 850868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Parsed& base_parsed, 8517d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) const base::char16* fragment, 852868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) int fragment_len, 853868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) bool is_base_hierarchical, 854868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) bool* is_relative, 855868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Component* relative_component); 856c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 857c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Given a canonical parsed source URL, a URL fragment known to be relative, 858c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// and the identified relevant portion of the relative URL (computed by 859c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// IsRelativeURL), this produces a new parsed canonical URL in |output| and 860c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// |out_parsed|. 861c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 862c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// It also requires a flag indicating whether the base URL is a file: URL 863c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// which triggers additional logic. 864c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 865c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// The base URL should be canonical and have a host (may be empty for file 866c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// URLs) and a path. If it doesn't have these, we can't resolve relative 867c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// URLs off of it and will return the base as the output with an error flag. 868c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Becausee it is canonical is should also be ASCII. 869c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 870c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// The query charset converter follows the same rules as CanonicalizeQuery. 871c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 872c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Returns true on success. On failure, the output will be "something 873c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// reasonable" that will be consistent and valid, just probably not what 874c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// was intended by the web page author or caller. 875868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)URL_EXPORT bool ResolveRelativeURL( 876868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const char* base_url, 877868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Parsed& base_parsed, 878868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) bool base_is_file, 879868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const char* relative_url, 880868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Component& relative_component, 881868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CharsetConverter* query_converter, 882868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 883868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Parsed* out_parsed); 884868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)URL_EXPORT bool ResolveRelativeURL( 885868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const char* base_url, 886868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Parsed& base_parsed, 887868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) bool base_is_file, 8887d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) const base::char16* relative_url, 889868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const url_parse::Component& relative_component, 890868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CharsetConverter* query_converter, 891868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) CanonOutput* output, 892868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) url_parse::Parsed* out_parsed); 893c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 894c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} // namespace url_canon 895c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 896c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#endif // URL_URL_CANON_H_ 897