1// Copyright 2013 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#ifndef URL_URL_UTIL_H_ 6#define URL_URL_UTIL_H_ 7 8#include <string> 9 10#include "base/strings/string16.h" 11#include "url/url_canon.h" 12#include "url/url_export.h" 13#include "url/url_parse.h" 14 15namespace url_util { 16 17// Init ------------------------------------------------------------------------ 18 19// Initialization is NOT required, it will be implicitly initialized when first 20// used. However, this implicit initialization is NOT threadsafe. If you are 21// using this library in a threaded environment and don't have a consistent 22// "first call" (an example might be calling "AddStandardScheme" with your 23// special application-specific schemes) then you will want to call initialize 24// before spawning any threads. 25// 26// It is OK to call this function more than once, subsequent calls will simply 27// "noop", unless Shutdown() was called in the mean time. This will also be a 28// "noop" if other calls to the library have forced an initialization 29// beforehand. 30URL_EXPORT void Initialize(); 31 32// Cleanup is not required, except some strings may leak. For most user 33// applications, this is fine. If you're using it in a library that may get 34// loaded and unloaded, you'll want to unload to properly clean up your 35// library. 36URL_EXPORT void Shutdown(); 37 38// Schemes -------------------------------------------------------------------- 39 40// Adds an application-defined scheme to the internal list of "standard" URL 41// schemes. This function is not threadsafe and can not be called concurrently 42// with any other url_util function. It will assert if the list of standard 43// schemes has been locked (see LockStandardSchemes). 44URL_EXPORT void AddStandardScheme(const char* new_scheme); 45 46// Sets a flag to prevent future calls to AddStandardScheme from succeeding. 47// 48// This is designed to help prevent errors for multithreaded applications. 49// Normal usage would be to call AddStandardScheme for your custom schemes at 50// the beginning of program initialization, and then LockStandardSchemes. This 51// prevents future callers from mistakenly calling AddStandardScheme when the 52// program is running with multiple threads, where such usage would be 53// dangerous. 54// 55// We could have had AddStandardScheme use a lock instead, but that would add 56// some platform-specific dependencies we don't otherwise have now, and is 57// overkill considering the normal usage is so simple. 58URL_EXPORT void LockStandardSchemes(); 59 60// Locates the scheme in the given string and places it into |found_scheme|, 61// which may be NULL to indicate the caller does not care about the range. 62// 63// Returns whether the given |compare| scheme matches the scheme found in the 64// input (if any). The |compare| scheme must be a valid canonical scheme or 65// the result of the comparison is undefined. 66URL_EXPORT bool FindAndCompareScheme(const char* str, 67 int str_len, 68 const char* compare, 69 url_parse::Component* found_scheme); 70URL_EXPORT bool FindAndCompareScheme(const base::char16* str, 71 int str_len, 72 const char* compare, 73 url_parse::Component* found_scheme); 74inline bool FindAndCompareScheme(const std::string& str, 75 const char* compare, 76 url_parse::Component* found_scheme) { 77 return FindAndCompareScheme(str.data(), static_cast<int>(str.size()), 78 compare, found_scheme); 79} 80inline bool FindAndCompareScheme(const base::string16& str, 81 const char* compare, 82 url_parse::Component* found_scheme) { 83 return FindAndCompareScheme(str.data(), static_cast<int>(str.size()), 84 compare, found_scheme); 85} 86 87// Returns true if the given string represents a standard URL. This means that 88// either the scheme is in the list of known standard schemes. 89URL_EXPORT bool IsStandard(const char* spec, 90 const url_parse::Component& scheme); 91URL_EXPORT bool IsStandard(const base::char16* spec, 92 const url_parse::Component& scheme); 93 94// TODO(brettw) remove this. This is a temporary compatibility hack to avoid 95// breaking the WebKit build when this version is synced via Chrome. 96inline bool IsStandard(const char* spec, int spec_len, 97 const url_parse::Component& scheme) { 98 return IsStandard(spec, scheme); 99} 100 101// URL library wrappers ------------------------------------------------------- 102 103// Parses the given spec according to the extracted scheme type. Normal users 104// should use the URL object, although this may be useful if performance is 105// critical and you don't want to do the heap allocation for the std::string. 106// 107// As with the url_canon::Canonicalize* functions, the charset converter can 108// be NULL to use UTF-8 (it will be faster in this case). 109// 110// Returns true if a valid URL was produced, false if not. On failure, the 111// output and parsed structures will still be filled and will be consistent, 112// but they will not represent a loadable URL. 113URL_EXPORT bool Canonicalize(const char* spec, 114 int spec_len, 115 url_canon::CharsetConverter* charset_converter, 116 url_canon::CanonOutput* output, 117 url_parse::Parsed* output_parsed); 118URL_EXPORT bool Canonicalize(const base::char16* spec, 119 int spec_len, 120 url_canon::CharsetConverter* charset_converter, 121 url_canon::CanonOutput* output, 122 url_parse::Parsed* output_parsed); 123 124// Resolves a potentially relative URL relative to the given parsed base URL. 125// The base MUST be valid. The resulting canonical URL and parsed information 126// will be placed in to the given out variables. 127// 128// The relative need not be relative. If we discover that it's absolute, this 129// will produce a canonical version of that URL. See Canonicalize() for more 130// about the charset_converter. 131// 132// Returns true if the output is valid, false if the input could not produce 133// a valid URL. 134URL_EXPORT bool ResolveRelative(const char* base_spec, 135 int base_spec_len, 136 const url_parse::Parsed& base_parsed, 137 const char* relative, 138 int relative_length, 139 url_canon::CharsetConverter* charset_converter, 140 url_canon::CanonOutput* output, 141 url_parse::Parsed* output_parsed); 142URL_EXPORT bool ResolveRelative(const char* base_spec, 143 int base_spec_len, 144 const url_parse::Parsed& base_parsed, 145 const base::char16* relative, 146 int relative_length, 147 url_canon::CharsetConverter* charset_converter, 148 url_canon::CanonOutput* output, 149 url_parse::Parsed* output_parsed); 150 151// Replaces components in the given VALID input url. The new canonical URL info 152// is written to output and out_parsed. 153// 154// Returns true if the resulting URL is valid. 155URL_EXPORT bool ReplaceComponents( 156 const char* spec, 157 int spec_len, 158 const url_parse::Parsed& parsed, 159 const url_canon::Replacements<char>& replacements, 160 url_canon::CharsetConverter* charset_converter, 161 url_canon::CanonOutput* output, 162 url_parse::Parsed* out_parsed); 163URL_EXPORT bool ReplaceComponents( 164 const char* spec, 165 int spec_len, 166 const url_parse::Parsed& parsed, 167 const url_canon::Replacements<base::char16>& replacements, 168 url_canon::CharsetConverter* charset_converter, 169 url_canon::CanonOutput* output, 170 url_parse::Parsed* out_parsed); 171 172// String helper functions ---------------------------------------------------- 173 174// Compare the lower-case form of the given string against the given ASCII 175// string. This is useful for doing checking if an input string matches some 176// token, and it is optimized to avoid intermediate string copies. 177// 178// The versions of this function that don't take a b_end assume that the b 179// string is NULL terminated. 180URL_EXPORT bool LowerCaseEqualsASCII(const char* a_begin, 181 const char* a_end, 182 const char* b); 183URL_EXPORT bool LowerCaseEqualsASCII(const char* a_begin, 184 const char* a_end, 185 const char* b_begin, 186 const char* b_end); 187URL_EXPORT bool LowerCaseEqualsASCII(const base::char16* a_begin, 188 const base::char16* a_end, 189 const char* b); 190 191// Unescapes the given string using URL escaping rules. 192URL_EXPORT void DecodeURLEscapeSequences(const char* input, int length, 193 url_canon::CanonOutputW* output); 194 195// Escapes the given string as defined by the JS method encodeURIComponent. See 196// https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/encodeURIComponent 197URL_EXPORT void EncodeURIComponent(const char* input, int length, 198 url_canon::CanonOutput* output); 199 200 201} // namespace url_util 202 203#endif // URL_URL_UTIL_H_ 204