1/*
2 * Copyright (C) 2005, 2007, 2008, 2009 Apple Inc. All rights reserved.
3 * Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com)
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1.  Redistributions of source code must retain the above copyright
10 *     notice, this list of conditions and the following disclaimer.
11 * 2.  Redistributions in binary form must reproduce the above copyright
12 *     notice, this list of conditions and the following disclaimer in the
13 *     documentation and/or other materials provided with the distribution.
14 * 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
15 *     its contributors may be used to endorse or promote products derived
16 *     from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
19 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
22 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#import "WebNSURLExtras.h"
31
32#import "WebKitNSStringExtras.h"
33#import "WebLocalizableStrings.h"
34#import "WebNSDataExtras.h"
35#import "WebNSObjectExtras.h"
36#import "WebSystemInterface.h"
37#import <Foundation/NSURLRequest.h>
38#import <WebCore/KURL.h>
39#import <WebCore/LoaderNSURLExtras.h>
40#import <WebKitSystemInterface.h>
41#import <wtf/Assertions.h>
42#import <unicode/uchar.h>
43#import <unicode/uidna.h>
44#import <unicode/uscript.h>
45
46using namespace WebCore;
47using namespace WTF;
48
49typedef void (* StringRangeApplierFunction)(NSString *string, NSRange range, void *context);
50
51// Needs to be big enough to hold an IDN-encoded name.
52// For host names bigger than this, we won't do IDN encoding, which is almost certainly OK.
53#define HOST_NAME_BUFFER_LENGTH 2048
54
55#define URL_BYTES_BUFFER_LENGTH 2048
56
57static pthread_once_t IDNScriptWhiteListFileRead = PTHREAD_ONCE_INIT;
58static uint32_t IDNScriptWhiteList[(USCRIPT_CODE_LIMIT + 31) / 32];
59
60static inline BOOL isLookalikeCharacter(int charCode)
61{
62// FIXME: Move this code down into WebCore so it can be shared with other platforms.
63
64// This function treats the following as unsafe, lookalike characters:
65// any non-printable character, any character considered as whitespace that isn't already converted to a space by ICU,
66// and any ignorable character.
67
68// We also considered the characters in Mozilla's blacklist (http://kb.mozillazine.org/Network.IDN.blacklist_chars),
69// and included all of these characters that ICU can encode.
70
71    if (!u_isprint(charCode) || u_isUWhiteSpace(charCode) || u_hasBinaryProperty(charCode, UCHAR_DEFAULT_IGNORABLE_CODE_POINT))
72        return YES;
73
74    switch (charCode) {
75        case 0x00ED: /* LATIN SMALL LETTER I WITH ACUTE */
76        case 0x01C3: /* LATIN LETTER RETROFLEX CLICK */
77        case 0x0251: /* LATIN SMALL LETTER ALPHA */
78        case 0x0261: /* LATIN SMALL LETTER SCRIPT G */
79        case 0x0337: /* COMBINING SHORT SOLIDUS OVERLAY */
80        case 0x0338: /* COMBINING LONG SOLIDUS OVERLAY */
81        case 0x05B4: /* HEBREW POINT HIRIQ */
82        case 0x05BC: /* HEBREW POINT DAGESH OR MAPIQ */
83        case 0x05C3: /* HEBREW PUNCTUATION SOF PASUQ */
84        case 0x05F4: /* HEBREW PUNCTUATION GERSHAYIM */
85        case 0x0660: /* ARABIC INDIC DIGIT ZERO */
86        case 0x06D4: /* ARABIC FULL STOP */
87        case 0x06F0: /* EXTENDED ARABIC INDIC DIGIT ZERO */
88        case 0x2027: /* HYPHENATION POINT */
89        case 0x2039: /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */
90        case 0x203A: /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */
91        case 0x2044: /* FRACTION SLASH */
92        case 0x2215: /* DIVISION SLASH */
93        case 0x2216: /* SET MINUS */
94        case 0x233F: /* APL FUNCTIONAL SYMBOL SLASH BAR */
95        case 0x23AE: /* INTEGRAL EXTENSION */
96        case 0x244A: /* OCR DOUBLE BACKSLASH */
97        case 0x2571: /* BOX DRAWINGS LIGHT DIAGONAL UPPER RIGHT TO LOWER LEFT */
98        case 0x2572: /* BOX DRAWINGS LIGHT DIAGONAL UPPER LEFT TO LOWER RIGHT */
99        case 0x29F8: /* BIG SOLIDUS */
100        case 0x29f6: /* SOLIDUS WITH OVERBAR */
101        case 0x2AFB: /* TRIPLE SOLIDUS BINARY RELATION */
102        case 0x2AFD: /* DOUBLE SOLIDUS OPERATOR */
103        case 0x3008: /* LEFT ANGLE BRACKET */
104        case 0x3014: /* LEFT TORTOISE SHELL BRACKET */
105        case 0x3015: /* RIGHT TORTOISE SHELL BRACKET */
106        case 0x3033: /* VERTICAL KANA REPEAT MARK UPPER HALF */
107        case 0x3035: /* VERTICAL KANA REPEAT MARK LOWER HALF */
108        case 0x321D: /* PARENTHESIZED KOREAN CHARACTER OJEON */
109        case 0x321E: /* PARENTHESIZED KOREAN CHARACTER O HU */
110        case 0x33DF: /* SQUARE A OVER M */
111        case 0xFE14: /* PRESENTATION FORM FOR VERTICAL SEMICOLON */
112        case 0xFE15: /* PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK */
113        case 0xFE3F: /* PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET */
114        case 0xFE5D: /* SMALL LEFT TORTOISE SHELL BRACKET */
115        case 0xFE5E: /* SMALL RIGHT TORTOISE SHELL BRACKET */
116            return YES;
117        default:
118            return NO;
119    }
120}
121
122static char hexDigit(int i)
123{
124    if (i < 0 || i > 16) {
125        LOG_ERROR("illegal hex digit");
126        return '0';
127    }
128    int h = i;
129    if (h >= 10) {
130        h = h - 10 + 'A';
131    }
132    else {
133        h += '0';
134    }
135    return h;
136}
137
138static BOOL isHexDigit(char c)
139{
140    return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
141}
142
143static int hexDigitValue(char c)
144{
145    if (c >= '0' && c <= '9') {
146        return c - '0';
147    }
148    if (c >= 'A' && c <= 'F') {
149        return c - 'A' + 10;
150    }
151    if (c >= 'a' && c <= 'f') {
152        return c - 'a' + 10;
153    }
154    LOG_ERROR("illegal hex digit");
155    return 0;
156}
157
158static void applyHostNameFunctionToMailToURLString(NSString *string, StringRangeApplierFunction f, void *context)
159{
160    // In a mailto: URL, host names come after a '@' character and end with a '>' or ',' or '?' character.
161    // Skip quoted strings so that characters in them don't confuse us.
162    // When we find a '?' character, we are past the part of the URL that contains host names.
163
164    static NSCharacterSet *hostNameOrStringStartCharacters;
165    if (hostNameOrStringStartCharacters == nil) {
166        hostNameOrStringStartCharacters = [NSCharacterSet characterSetWithCharactersInString:@"\"@?"];
167        CFRetain(hostNameOrStringStartCharacters);
168    }
169    static NSCharacterSet *hostNameEndCharacters;
170    if (hostNameEndCharacters == nil) {
171        hostNameEndCharacters = [NSCharacterSet characterSetWithCharactersInString:@">,?"];
172        CFRetain(hostNameEndCharacters);
173    }
174    static NSCharacterSet *quotedStringCharacters;
175    if (quotedStringCharacters == nil) {
176        quotedStringCharacters = [NSCharacterSet characterSetWithCharactersInString:@"\"\\"];
177        CFRetain(quotedStringCharacters);
178    }
179
180    unsigned stringLength = [string length];
181    NSRange remaining = NSMakeRange(0, stringLength);
182
183    while (1) {
184        // Find start of host name or of quoted string.
185        NSRange hostNameOrStringStart = [string rangeOfCharacterFromSet:hostNameOrStringStartCharacters options:0 range:remaining];
186        if (hostNameOrStringStart.location == NSNotFound) {
187            return;
188        }
189        unichar c = [string characterAtIndex:hostNameOrStringStart.location];
190        remaining.location = NSMaxRange(hostNameOrStringStart);
191        remaining.length = stringLength - remaining.location;
192
193        if (c == '?') {
194            return;
195        }
196
197        if (c == '@') {
198            // Find end of host name.
199            unsigned hostNameStart = remaining.location;
200            NSRange hostNameEnd = [string rangeOfCharacterFromSet:hostNameEndCharacters options:0 range:remaining];
201            BOOL done;
202            if (hostNameEnd.location == NSNotFound) {
203                hostNameEnd.location = stringLength;
204                done = YES;
205            } else {
206                remaining.location = hostNameEnd.location;
207                remaining.length = stringLength - remaining.location;
208                done = NO;
209            }
210
211            // Process host name range.
212            f(string, NSMakeRange(hostNameStart, hostNameEnd.location - hostNameStart), context);
213
214            if (done) {
215                return;
216            }
217        } else {
218            // Skip quoted string.
219            ASSERT(c == '"');
220            while (1) {
221                NSRange escapedCharacterOrStringEnd = [string rangeOfCharacterFromSet:quotedStringCharacters options:0 range:remaining];
222                if (escapedCharacterOrStringEnd.location == NSNotFound) {
223                    return;
224                }
225                c = [string characterAtIndex:escapedCharacterOrStringEnd.location];
226                remaining.location = NSMaxRange(escapedCharacterOrStringEnd);
227                remaining.length = stringLength - remaining.location;
228
229                // If we are the end of the string, then break from the string loop back to the host name loop.
230                if (c == '"') {
231                    break;
232                }
233
234                // Skip escaped character.
235                ASSERT(c == '\\');
236                if (remaining.length == 0) {
237                    return;
238                }
239                remaining.location += 1;
240                remaining.length -= 1;
241            }
242        }
243    }
244}
245
246static void applyHostNameFunctionToURLString(NSString *string, StringRangeApplierFunction f, void *context)
247{
248    // Find hostnames. Too bad we can't use any real URL-parsing code to do this,
249    // but we have to do it before doing all the %-escaping, and this is the only
250    // code we have that parses mailto URLs anyway.
251
252    // Maybe we should implement this using a character buffer instead?
253
254    if ([string _webkit_hasCaseInsensitivePrefix:@"mailto:"]) {
255        applyHostNameFunctionToMailToURLString(string, f, context);
256        return;
257    }
258
259    // Find the host name in a hierarchical URL.
260    // It comes after a "://" sequence, with scheme characters preceding.
261    // If ends with the end of the string or a ":", "/", or a "?".
262    // If there is a "@" character, the host part is just the part after the "@".
263    NSRange separatorRange = [string rangeOfString:@"://"];
264    if (separatorRange.location == NSNotFound) {
265        return;
266    }
267
268    // Check that all characters before the :// are valid scheme characters.
269    static NSCharacterSet *nonSchemeCharacters;
270    if (nonSchemeCharacters == nil) {
271        nonSchemeCharacters = [[NSCharacterSet characterSetWithCharactersInString:@"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-."] invertedSet];
272        CFRetain(nonSchemeCharacters);
273    }
274    if ([string rangeOfCharacterFromSet:nonSchemeCharacters options:0 range:NSMakeRange(0, separatorRange.location)].location != NSNotFound) {
275        return;
276    }
277
278    unsigned stringLength = [string length];
279
280    static NSCharacterSet *hostTerminators;
281    if (hostTerminators == nil) {
282        hostTerminators = [NSCharacterSet characterSetWithCharactersInString:@":/?#"];
283        CFRetain(hostTerminators);
284    }
285
286    // Start after the separator.
287    unsigned authorityStart = NSMaxRange(separatorRange);
288
289    // Find terminating character.
290    NSRange hostNameTerminator = [string rangeOfCharacterFromSet:hostTerminators options:0 range:NSMakeRange(authorityStart, stringLength - authorityStart)];
291    unsigned hostNameEnd = hostNameTerminator.location == NSNotFound ? stringLength : hostNameTerminator.location;
292
293    // Find "@" for the start of the host name.
294    NSRange userInfoTerminator = [string rangeOfString:@"@" options:0 range:NSMakeRange(authorityStart, hostNameEnd - authorityStart)];
295    unsigned hostNameStart = userInfoTerminator.location == NSNotFound ? authorityStart : NSMaxRange(userInfoTerminator);
296
297    f(string, NSMakeRange(hostNameStart, hostNameEnd - hostNameStart), context);
298}
299
300@implementation NSURL (WebNSURLExtras)
301
302static void collectRangesThatNeedMapping(NSString *string, NSRange range, void *context, BOOL encode)
303{
304    BOOL needsMapping = encode
305        ? [string _web_hostNameNeedsEncodingWithRange:range]
306        : [string _web_hostNameNeedsDecodingWithRange:range];
307    if (!needsMapping) {
308        return;
309    }
310
311    NSMutableArray **array = (NSMutableArray **)context;
312    if (*array == nil) {
313        *array = [[NSMutableArray alloc] init];
314    }
315
316    [*array addObject:[NSValue valueWithRange:range]];
317}
318
319static void collectRangesThatNeedEncoding(NSString *string, NSRange range, void *context)
320{
321    return collectRangesThatNeedMapping(string, range, context, YES);
322}
323
324static void collectRangesThatNeedDecoding(NSString *string, NSRange range, void *context)
325{
326    return collectRangesThatNeedMapping(string, range, context, NO);
327}
328
329static NSString *mapHostNames(NSString *string, BOOL encode)
330{
331    // Generally, we want to optimize for the case where there is one host name that does not need mapping.
332
333    if (encode && [string canBeConvertedToEncoding:NSASCIIStringEncoding])
334        return string;
335
336    // Make a list of ranges that actually need mapping.
337    NSMutableArray *hostNameRanges = nil;
338    StringRangeApplierFunction f = encode
339        ? collectRangesThatNeedEncoding
340        : collectRangesThatNeedDecoding;
341    applyHostNameFunctionToURLString(string, f, &hostNameRanges);
342    if (hostNameRanges == nil)
343        return string;
344
345    // Do the mapping.
346    NSMutableString *mutableCopy = [string mutableCopy];
347    unsigned i = [hostNameRanges count];
348    while (i-- != 0) {
349        NSRange hostNameRange = [[hostNameRanges objectAtIndex:i] rangeValue];
350        NSString *mappedHostName = encode
351            ? [string _web_encodeHostNameWithRange:hostNameRange]
352            : [string _web_decodeHostNameWithRange:hostNameRange];
353        [mutableCopy replaceCharactersInRange:hostNameRange withString:mappedHostName];
354    }
355    [hostNameRanges release];
356    return [mutableCopy autorelease];
357}
358
359+ (NSURL *)_web_URLWithUserTypedString:(NSString *)string relativeToURL:(NSURL *)URL
360{
361    if (string == nil) {
362        return nil;
363    }
364    string = mapHostNames([string _webkit_stringByTrimmingWhitespace], YES);
365
366    NSData *userTypedData = [string dataUsingEncoding:NSUTF8StringEncoding];
367    ASSERT(userTypedData);
368
369    const UInt8 *inBytes = static_cast<const UInt8 *>([userTypedData bytes]);
370    int inLength = [userTypedData length];
371    if (inLength == 0) {
372        return [NSURL URLWithString:@""];
373    }
374
375    char *outBytes = static_cast<char *>(malloc(inLength * 3)); // large enough to %-escape every character
376    char *p = outBytes;
377    int outLength = 0;
378    int i;
379    for (i = 0; i < inLength; i++) {
380        UInt8 c = inBytes[i];
381        if (c <= 0x20 || c >= 0x7f) {
382            *p++ = '%';
383            *p++ = hexDigit(c >> 4);
384            *p++ = hexDigit(c & 0xf);
385            outLength += 3;
386        }
387        else {
388            *p++ = c;
389            outLength++;
390        }
391    }
392
393    NSData *data = [NSData dataWithBytesNoCopy:outBytes length:outLength]; // adopts outBytes
394    return [self _web_URLWithData:data relativeToURL:URL];
395}
396
397+ (NSURL *)_web_URLWithUserTypedString:(NSString *)string
398{
399    return [self _web_URLWithUserTypedString:string relativeToURL:nil];
400}
401
402+ (NSURL *)_web_URLWithDataAsString:(NSString *)string
403{
404    if (string == nil) {
405        return nil;
406    }
407    return [self _web_URLWithDataAsString:string relativeToURL:nil];
408}
409
410+ (NSURL *)_web_URLWithDataAsString:(NSString *)string relativeToURL:(NSURL *)baseURL
411{
412    if (string == nil) {
413        return nil;
414    }
415    string = [string _webkit_stringByTrimmingWhitespace];
416    NSData *data = [string dataUsingEncoding:NSISOLatin1StringEncoding];
417    return [self _web_URLWithData:data relativeToURL:baseURL];
418}
419
420+ (NSURL *)_web_URLWithData:(NSData *)data
421{
422    return [NSURL _web_URLWithData:data relativeToURL:nil];
423}
424
425+ (NSURL *)_web_URLWithData:(NSData *)data relativeToURL:(NSURL *)baseURL
426{
427    if (data == nil)
428        return nil;
429
430    NSURL *result = nil;
431    size_t length = [data length];
432    if (length > 0) {
433        // work around <rdar://4470771>: CFURLCreateAbsoluteURLWithBytes(.., TRUE) doesn't remove non-path components.
434        baseURL = [baseURL _webkit_URLByRemovingResourceSpecifier];
435
436        const UInt8 *bytes = static_cast<const UInt8*>([data bytes]);
437        // NOTE: We use UTF-8 here since this encoding is used when computing strings when returning URL components
438        // (e.g calls to NSURL -path). However, this function is not tolerant of illegal UTF-8 sequences, which
439        // could either be a malformed string or bytes in a different encoding, like shift-jis, so we fall back
440        // onto using ISO Latin 1 in those cases.
441        result = WebCFAutorelease(CFURLCreateAbsoluteURLWithBytes(NULL, bytes, length, kCFStringEncodingUTF8, (CFURLRef)baseURL, YES));
442        if (!result)
443            result = WebCFAutorelease(CFURLCreateAbsoluteURLWithBytes(NULL, bytes, length, kCFStringEncodingISOLatin1, (CFURLRef)baseURL, YES));
444    } else
445        result = [NSURL URLWithString:@""];
446
447    return result;
448}
449
450- (NSData *)_web_originalData
451{
452    UInt8 *buffer = (UInt8 *)malloc(URL_BYTES_BUFFER_LENGTH);
453    CFIndex bytesFilled = CFURLGetBytes((CFURLRef)self, buffer, URL_BYTES_BUFFER_LENGTH);
454    if (bytesFilled == -1) {
455        CFIndex bytesToAllocate = CFURLGetBytes((CFURLRef)self, NULL, 0);
456        buffer = (UInt8 *)realloc(buffer, bytesToAllocate);
457        bytesFilled = CFURLGetBytes((CFURLRef)self, buffer, bytesToAllocate);
458        ASSERT(bytesFilled == bytesToAllocate);
459    }
460
461    // buffer is adopted by the NSData
462    NSData *data = [NSData dataWithBytesNoCopy:buffer length:bytesFilled freeWhenDone:YES];
463
464    NSURL *baseURL = (NSURL *)CFURLGetBaseURL((CFURLRef)self);
465    if (baseURL)
466        return [[NSURL _web_URLWithData:data relativeToURL:baseURL] _web_originalData];
467    return data;
468}
469
470- (NSString *)_web_originalDataAsString
471{
472    return [[[NSString alloc] initWithData:[self _web_originalData] encoding:NSISOLatin1StringEncoding] autorelease];
473}
474
475static CFStringRef createStringWithEscapedUnsafeCharacters(CFStringRef string)
476{
477    CFIndex length = CFStringGetLength(string);
478    Vector<UChar, 2048> sourceBuffer(length);
479    CFStringGetCharacters(string, CFRangeMake(0, length), sourceBuffer.data());
480
481    Vector<UChar, 2048> outBuffer;
482
483    CFIndex i = 0;
484    while (i < length) {
485        UChar32 c;
486        U16_NEXT(sourceBuffer, i, length, c)
487
488        if (isLookalikeCharacter(c)) {
489            uint8_t utf8Buffer[4];
490            CFIndex offset = 0;
491            UBool failure = false;
492            U8_APPEND(utf8Buffer, offset, 4, c, failure)
493            ASSERT(!failure);
494
495            for (CFIndex j = 0; j < offset; ++j) {
496                outBuffer.append('%');
497                outBuffer.append(hexDigit(utf8Buffer[j] >> 4));
498                outBuffer.append(hexDigit(utf8Buffer[j] & 0xf));
499            }
500        } else {
501            UChar utf16Buffer[2];
502            CFIndex offset = 0;
503            UBool failure = false;
504            U16_APPEND(utf16Buffer, offset, 2, c, failure)
505            ASSERT(!failure);
506            for (CFIndex j = 0; j < offset; ++j)
507                outBuffer.append(utf16Buffer[j]);
508        }
509    }
510
511    return CFStringCreateWithCharacters(NULL, outBuffer.data(), outBuffer.size());
512}
513
514- (NSString *)_web_userVisibleString
515{
516    NSData *data = [self _web_originalData];
517    const unsigned char *before = static_cast<const unsigned char*>([data bytes]);
518    int length = [data length];
519
520    bool needsHostNameDecoding = false;
521
522    const unsigned char *p = before;
523    int bufferLength = (length * 3) + 1;
524    char *after = static_cast<char *>(malloc(bufferLength)); // large enough to %-escape every character
525    char *q = after;
526    int i;
527    for (i = 0; i < length; i++) {
528        unsigned char c = p[i];
529        // unescape escape sequences that indicate bytes greater than 0x7f
530        if (c == '%' && (i + 1 < length && isHexDigit(p[i + 1])) && i + 2 < length && isHexDigit(p[i + 2])) {
531            unsigned char u = (hexDigitValue(p[i + 1]) << 4) | hexDigitValue(p[i + 2]);
532            if (u > 0x7f) {
533                // unescape
534                *q++ = u;
535            } else {
536                // do not unescape
537                *q++ = p[i];
538                *q++ = p[i + 1];
539                *q++ = p[i + 2];
540            }
541            i += 2;
542        } else {
543            *q++ = c;
544
545            // Check for "xn--" in an efficient, non-case-sensitive, way.
546            if (c == '-' && i >= 3 && !needsHostNameDecoding && (q[-4] | 0x20) == 'x' && (q[-3] | 0x20) == 'n' && q[-2] == '-')
547                needsHostNameDecoding = true;
548        }
549    }
550    *q = '\0';
551
552    // Check string to see if it can be converted to display using UTF-8
553    NSString *result = [NSString stringWithUTF8String:after];
554    if (!result) {
555        // Could not convert to UTF-8.
556        // Convert characters greater than 0x7f to escape sequences.
557        // Shift current string to the end of the buffer
558        // then we will copy back bytes to the start of the buffer
559        // as we convert.
560        int afterlength = q - after;
561        char *p = after + bufferLength - afterlength - 1;
562        memmove(p, after, afterlength + 1); // copies trailing '\0'
563        char *q = after;
564        while (*p) {
565            unsigned char c = *p;
566            if (c > 0x7f) {
567                *q++ = '%';
568                *q++ = hexDigit(c >> 4);
569                *q++ = hexDigit(c & 0xf);
570            } else {
571                *q++ = *p;
572            }
573            p++;
574        }
575        *q = '\0';
576        result = [NSString stringWithUTF8String:after];
577    }
578
579    free(after);
580
581    result = mapHostNames(result, !needsHostNameDecoding);
582    result = [result precomposedStringWithCanonicalMapping];
583    return WebCFAutorelease(createStringWithEscapedUnsafeCharacters((CFStringRef)result));
584}
585
586- (BOOL)_web_isEmpty
587{
588    if (!CFURLGetBaseURL((CFURLRef)self))
589        return CFURLGetBytes((CFURLRef)self, NULL, 0) == 0;
590    return [[self _web_originalData] length] == 0;
591}
592
593- (const char *)_web_URLCString
594{
595    NSMutableData *data = [NSMutableData data];
596    [data appendData:[self _web_originalData]];
597    [data appendBytes:"\0" length:1];
598    return (const char *)[data bytes];
599 }
600
601- (NSURL *)_webkit_canonicalize
602{
603    NSURLRequest *request = [[NSURLRequest alloc] initWithURL:self];
604    Class concreteClass = WKNSURLProtocolClassForRequest(request);
605    if (!concreteClass) {
606        [request release];
607        return self;
608    }
609
610    // This applies NSURL's concept of canonicalization, but not KURL's concept. It would
611    // make sense to apply both, but when we tried that it caused a performance degradation
612    // (see 5315926). It might make sense to apply only the KURL concept and not the NSURL
613    // concept, but it's too risky to make that change for WebKit 3.0.
614    NSURLRequest *newRequest = [concreteClass canonicalRequestForRequest:request];
615    NSURL *newURL = [newRequest URL];
616    NSURL *result = [[newURL retain] autorelease];
617    [request release];
618
619    return result;
620}
621
622- (NSURL *)_web_URLByTruncatingOneCharacterBeforeComponent:(CFURLComponentType)component
623{
624    CFRange fragRg = CFURLGetByteRangeForComponent((CFURLRef)self, component, NULL);
625    if (fragRg.location == kCFNotFound)
626        return self;
627
628    UInt8 *urlBytes, buffer[2048];
629    CFIndex numBytes = CFURLGetBytes((CFURLRef)self, buffer, 2048);
630    if (numBytes == -1) {
631        numBytes = CFURLGetBytes((CFURLRef)self, NULL, 0);
632        urlBytes = static_cast<UInt8*>(malloc(numBytes));
633        CFURLGetBytes((CFURLRef)self, urlBytes, numBytes);
634    } else
635        urlBytes = buffer;
636
637    NSURL *result = (NSURL *)CFMakeCollectable(CFURLCreateWithBytes(NULL, urlBytes, fragRg.location - 1, kCFStringEncodingUTF8, NULL));
638    if (!result)
639        result = (NSURL *)CFMakeCollectable(CFURLCreateWithBytes(NULL, urlBytes, fragRg.location - 1, kCFStringEncodingISOLatin1, NULL));
640
641    if (urlBytes != buffer) free(urlBytes);
642    return result ? [result autorelease] : self;
643}
644
645- (NSURL *)_webkit_URLByRemovingFragment
646{
647    return [self _web_URLByTruncatingOneCharacterBeforeComponent:kCFURLComponentFragment];
648}
649
650- (NSURL *)_webkit_URLByRemovingResourceSpecifier
651{
652    return [self _web_URLByTruncatingOneCharacterBeforeComponent:kCFURLComponentResourceSpecifier];
653}
654
655- (NSURL *)_web_URLByRemovingComponentAndSubsequentCharacter:(CFURLComponentType)component
656{
657    CFRange range = CFURLGetByteRangeForComponent((CFURLRef)self, component, 0);
658    if (range.location == kCFNotFound)
659        return self;
660
661    // Remove one subsequent character.
662    ++range.length;
663
664    UInt8* urlBytes;
665    UInt8 buffer[2048];
666    CFIndex numBytes = CFURLGetBytes((CFURLRef)self, buffer, 2048);
667    if (numBytes == -1) {
668        numBytes = CFURLGetBytes((CFURLRef)self, NULL, 0);
669        urlBytes = static_cast<UInt8*>(malloc(numBytes));
670        CFURLGetBytes((CFURLRef)self, urlBytes, numBytes);
671    } else
672        urlBytes = buffer;
673
674    if (numBytes < range.location)
675        return self;
676    if (numBytes < range.location + range.length)
677        range.length = numBytes - range.location;
678
679    memmove(urlBytes + range.location, urlBytes + range.location + range.length, numBytes - range.location + range.length);
680
681    NSURL *result = (NSURL *)CFMakeCollectable(CFURLCreateWithBytes(NULL, urlBytes, numBytes - range.length, kCFStringEncodingUTF8, NULL));
682    if (!result)
683        result = (NSURL *)CFMakeCollectable(CFURLCreateWithBytes(NULL, urlBytes, numBytes - range.length, kCFStringEncodingISOLatin1, NULL));
684
685    if (urlBytes != buffer)
686        free(urlBytes);
687
688    return result ? [result autorelease] : self;
689}
690
691- (NSURL *)_web_URLByRemovingUserInfo
692{
693    return [self _web_URLByRemovingComponentAndSubsequentCharacter:kCFURLComponentUserInfo];
694}
695
696- (BOOL)_webkit_isJavaScriptURL
697{
698    return [[self _web_originalDataAsString] _webkit_isJavaScriptURL];
699}
700
701- (NSString *)_webkit_scriptIfJavaScriptURL
702{
703    return [[self absoluteString] _webkit_scriptIfJavaScriptURL];
704}
705
706- (BOOL)_webkit_isFileURL
707{
708    return [[self _web_originalDataAsString] _webkit_isFileURL];
709}
710
711- (BOOL)_webkit_isFTPDirectoryURL
712{
713    return [[self _web_originalDataAsString] _webkit_isFTPDirectoryURL];
714}
715
716- (BOOL)_webkit_shouldLoadAsEmptyDocument
717{
718    return [[self _web_originalDataAsString] _webkit_hasCaseInsensitivePrefix:@"about:"] || [self _web_isEmpty];
719}
720
721- (NSURL *)_web_URLWithLowercasedScheme
722{
723    CFRange range;
724    CFURLGetByteRangeForComponent((CFURLRef)self, kCFURLComponentScheme, &range);
725    if (range.location == kCFNotFound) {
726        return self;
727    }
728
729    UInt8 static_buffer[URL_BYTES_BUFFER_LENGTH];
730    UInt8 *buffer = static_buffer;
731    CFIndex bytesFilled = CFURLGetBytes((CFURLRef)self, buffer, URL_BYTES_BUFFER_LENGTH);
732    if (bytesFilled == -1) {
733        CFIndex bytesToAllocate = CFURLGetBytes((CFURLRef)self, NULL, 0);
734        buffer = static_cast<UInt8 *>(malloc(bytesToAllocate));
735        bytesFilled = CFURLGetBytes((CFURLRef)self, buffer, bytesToAllocate);
736        ASSERT(bytesFilled == bytesToAllocate);
737    }
738
739    int i;
740    BOOL changed = NO;
741    for (i = 0; i < range.length; ++i) {
742        char c = buffer[range.location + i];
743        char lower = toASCIILower(c);
744        if (c != lower) {
745            buffer[range.location + i] = lower;
746            changed = YES;
747        }
748    }
749
750    NSURL *result = changed
751        ? (NSURL *)WebCFAutorelease(CFURLCreateAbsoluteURLWithBytes(NULL, buffer, bytesFilled, kCFStringEncodingUTF8, nil, YES))
752        : (NSURL *)self;
753
754    if (buffer != static_buffer) {
755        free(buffer);
756    }
757
758    return result;
759}
760
761
762-(BOOL)_web_hasQuestionMarkOnlyQueryString
763{
764    CFRange rangeWithSeparators;
765    CFURLGetByteRangeForComponent((CFURLRef)self, kCFURLComponentQuery, &rangeWithSeparators);
766    if (rangeWithSeparators.location != kCFNotFound && rangeWithSeparators.length == 1) {
767        return YES;
768    }
769    return NO;
770}
771
772-(NSData *)_web_schemeSeparatorWithoutColon
773{
774    NSData *result = nil;
775    CFRange rangeWithSeparators;
776    CFRange range = CFURLGetByteRangeForComponent((CFURLRef)self, kCFURLComponentScheme, &rangeWithSeparators);
777    if (rangeWithSeparators.location != kCFNotFound) {
778        NSString *absoluteString = [self absoluteString];
779        NSRange separatorsRange = NSMakeRange(range.location + range.length + 1, rangeWithSeparators.length - range.length - 1);
780        if (separatorsRange.location + separatorsRange.length <= [absoluteString length]) {
781            NSString *slashes = [absoluteString substringWithRange:separatorsRange];
782            result = [slashes dataUsingEncoding:NSISOLatin1StringEncoding];
783        }
784    }
785    return result;
786}
787
788#define completeURL (CFURLComponentType)-1
789
790-(NSData *)_web_dataForURLComponentType:(CFURLComponentType)componentType
791{
792    static int URLComponentTypeBufferLength = 2048;
793
794    UInt8 staticAllBytesBuffer[URLComponentTypeBufferLength];
795    UInt8 *allBytesBuffer = staticAllBytesBuffer;
796
797    CFIndex bytesFilled = CFURLGetBytes((CFURLRef)self, allBytesBuffer, URLComponentTypeBufferLength);
798    if (bytesFilled == -1) {
799        CFIndex bytesToAllocate = CFURLGetBytes((CFURLRef)self, NULL, 0);
800        allBytesBuffer = static_cast<UInt8 *>(malloc(bytesToAllocate));
801        bytesFilled = CFURLGetBytes((CFURLRef)self, allBytesBuffer, bytesToAllocate);
802    }
803
804    CFRange range;
805    if (componentType != completeURL) {
806        range = CFURLGetByteRangeForComponent((CFURLRef)self, componentType, NULL);
807        if (range.location == kCFNotFound) {
808            return nil;
809        }
810    }
811    else {
812        range.location = 0;
813        range.length = bytesFilled;
814    }
815
816    NSData *componentData = [NSData dataWithBytes:allBytesBuffer + range.location length:range.length];
817
818    const unsigned char *bytes = static_cast<const unsigned char *>([componentData bytes]);
819    NSMutableData *resultData = [NSMutableData data];
820    // NOTE: add leading '?' to query strings non-zero length query strings.
821    // NOTE: retain question-mark only query strings.
822    if (componentType == kCFURLComponentQuery) {
823        if (range.length > 0 || [self _web_hasQuestionMarkOnlyQueryString]) {
824            [resultData appendBytes:"?" length:1];
825        }
826    }
827    int i;
828    for (i = 0; i < range.length; i++) {
829        unsigned char c = bytes[i];
830        if (c <= 0x20 || c >= 0x7f) {
831            char escaped[3];
832            escaped[0] = '%';
833            escaped[1] = hexDigit(c >> 4);
834            escaped[2] = hexDigit(c & 0xf);
835            [resultData appendBytes:escaped length:3];
836        }
837        else {
838            char b[1];
839            b[0] = c;
840            [resultData appendBytes:b length:1];
841        }
842    }
843
844    if (staticAllBytesBuffer != allBytesBuffer) {
845        free(allBytesBuffer);
846    }
847
848    return resultData;
849}
850
851-(NSData *)_web_schemeData
852{
853    return [self _web_dataForURLComponentType:kCFURLComponentScheme];
854}
855
856-(NSData *)_web_hostData
857{
858    NSData *result = [self _web_dataForURLComponentType:kCFURLComponentHost];
859    NSData *scheme = [self _web_schemeData];
860    // Take off localhost for file
861    if ([scheme _web_isCaseInsensitiveEqualToCString:"file"]) {
862        return ([result _web_isCaseInsensitiveEqualToCString:"localhost"]) ? nil : result;
863    }
864    return result;
865}
866
867- (NSString *)_web_hostString
868{
869    NSData *data = [self _web_hostData];
870    if (!data) {
871        data = [NSData data];
872    }
873    return [[[NSString alloc] initWithData:[self _web_hostData] encoding:NSUTF8StringEncoding] autorelease];
874}
875
876- (NSString *)_webkit_suggestedFilenameWithMIMEType:(NSString *)MIMEType
877{
878    return suggestedFilenameWithMIMEType(self, MIMEType);
879}
880
881@end
882
883@implementation NSString (WebNSURLExtras)
884
885- (BOOL)_web_isUserVisibleURL
886{
887    BOOL valid = YES;
888    // get buffer
889
890    char static_buffer[1024];
891    const char *p;
892    BOOL success = CFStringGetCString((CFStringRef)self, static_buffer, 1023, kCFStringEncodingUTF8);
893    if (success) {
894        p = static_buffer;
895    } else {
896        p = [self UTF8String];
897    }
898
899    int length = strlen(p);
900
901    // check for characters <= 0x20 or >=0x7f, %-escape sequences of %7f, and xn--, these
902    // are the things that will lead _web_userVisibleString to actually change things.
903    int i;
904    for (i = 0; i < length; i++) {
905        unsigned char c = p[i];
906        // escape control characters, space, and delete
907        if (c <= 0x20 || c == 0x7f) {
908            valid = NO;
909            break;
910        } else if (c == '%' && (i + 1 < length && isHexDigit(p[i + 1])) && i + 2 < length && isHexDigit(p[i + 2])) {
911            unsigned char u = (hexDigitValue(p[i + 1]) << 4) | hexDigitValue(p[i + 2]);
912            if (u > 0x7f) {
913                valid = NO;
914                break;
915            }
916            i += 2;
917        } else {
918            // Check for "xn--" in an efficient, non-case-sensitive, way.
919            if (c == '-' && i >= 3 && (p[i - 3] | 0x20) == 'x' && (p[i - 2] | 0x20) == 'n' && p[i - 1] == '-') {
920                valid = NO;
921                break;
922            }
923        }
924    }
925
926    return valid;
927}
928
929
930- (BOOL)_webkit_isJavaScriptURL
931{
932    return [self _webkit_hasCaseInsensitivePrefix:@"javascript:"];
933}
934
935- (BOOL)_webkit_isFileURL
936{
937    return [self rangeOfString:@"file:" options:(NSCaseInsensitiveSearch | NSAnchoredSearch)].location != NSNotFound;
938}
939
940- (NSString *)_webkit_stringByReplacingValidPercentEscapes
941{
942    return decodeURLEscapeSequences(self);
943}
944
945- (NSString *)_webkit_scriptIfJavaScriptURL
946{
947    if (![self _webkit_isJavaScriptURL]) {
948        return nil;
949    }
950    return [[self substringFromIndex:11] _webkit_stringByReplacingValidPercentEscapes];
951}
952
953- (BOOL)_webkit_isFTPDirectoryURL
954{
955    int length = [self length];
956    if (length < 5) {  // 5 is length of "ftp:/"
957        return NO;
958    }
959    unichar lastChar = [self characterAtIndex:length - 1];
960    return lastChar == '/' && [self _webkit_hasCaseInsensitivePrefix:@"ftp:"];
961}
962
963
964static BOOL readIDNScriptWhiteListFile(NSString *filename)
965{
966    if (!filename) {
967        return NO;
968    }
969    FILE *file = fopen([filename fileSystemRepresentation], "r");
970    if (file == NULL) {
971        return NO;
972    }
973
974    // Read a word at a time.
975    // Allow comments, starting with # character to the end of the line.
976    while (1) {
977        // Skip a comment if present.
978        int result = fscanf(file, " #%*[^\n\r]%*[\n\r]");
979        if (result == EOF) {
980            break;
981        }
982
983        // Read a script name if present.
984        char word[33];
985        result = fscanf(file, " %32[^# \t\n\r]%*[^# \t\n\r] ", word);
986        if (result == EOF) {
987            break;
988        }
989        if (result == 1) {
990            // Got a word, map to script code and put it into the array.
991            int32_t script = u_getPropertyValueEnum(UCHAR_SCRIPT, word);
992            if (script >= 0 && script < USCRIPT_CODE_LIMIT) {
993                size_t index = script / 32;
994                uint32_t mask = 1 << (script % 32);
995                IDNScriptWhiteList[index] |= mask;
996            }
997        }
998    }
999    fclose(file);
1000    return YES;
1001}
1002
1003static void readIDNScriptWhiteList(void)
1004{
1005    // Read white list from library.
1006    NSArray *dirs = NSSearchPathForDirectoriesInDomains(NSLibraryDirectory, NSAllDomainsMask, YES);
1007    int i, numDirs = [dirs count];
1008    for (i = 0; i < numDirs; i++) {
1009        NSString *dir = [dirs objectAtIndex:i];
1010        if (readIDNScriptWhiteListFile([dir stringByAppendingPathComponent:@"IDNScriptWhiteList.txt"])) {
1011            return;
1012        }
1013    }
1014
1015    // Fall back on white list inside bundle.
1016    NSBundle *bundle = [NSBundle bundleWithIdentifier:@"com.apple.WebKit"];
1017    readIDNScriptWhiteListFile([bundle pathForResource:@"IDNScriptWhiteList" ofType:@"txt"]);
1018}
1019
1020static BOOL allCharactersInIDNScriptWhiteList(const UChar *buffer, int32_t length)
1021{
1022    pthread_once(&IDNScriptWhiteListFileRead, readIDNScriptWhiteList);
1023
1024    int32_t i = 0;
1025    while (i < length) {
1026        UChar32 c;
1027        U16_NEXT(buffer, i, length, c)
1028        UErrorCode error = U_ZERO_ERROR;
1029        UScriptCode script = uscript_getScript(c, &error);
1030        if (error != U_ZERO_ERROR) {
1031            LOG_ERROR("got ICU error while trying to look at scripts: %d", error);
1032            return NO;
1033        }
1034        if (script < 0) {
1035            LOG_ERROR("got negative number for script code from ICU: %d", script);
1036            return NO;
1037        }
1038        if (script >= USCRIPT_CODE_LIMIT) {
1039            return NO;
1040        }
1041        size_t index = script / 32;
1042        uint32_t mask = 1 << (script % 32);
1043        if (!(IDNScriptWhiteList[index] & mask)) {
1044            return NO;
1045        }
1046
1047        if (isLookalikeCharacter(c))
1048            return NO;
1049    }
1050    return YES;
1051}
1052
1053static BOOL allCharactersAllowedByTLDRules(const UChar* buffer, int32_t length)
1054{
1055    // Skip trailing dot for root domain.
1056    if (buffer[length - 1] == '.')
1057        --length;
1058
1059    if (length > 3
1060        && buffer[length - 3] == '.'
1061        && buffer[length - 2] == 0x0440 // CYRILLIC SMALL LETTER ER
1062        && buffer[length - 1] == 0x0444) // CYRILLIC SMALL LETTER EF
1063    {
1064        // Rules defined by <http://www.cctld.ru/ru/docs/rulesrf.php>. This code only checks requirements that matter for presentation purposes.
1065        for (int32_t i = length - 4; i; --i) {
1066            UChar ch = buffer[i];
1067
1068            // Only modern Russian letters, digits and dashes are allowed.
1069            if ((ch >= 0x0430 && ch <= 0x044f)
1070                || ch == 0x0451
1071                || (ch >= '0' && ch <= '9')
1072                || ch == '-')
1073                continue;
1074
1075            // Only check top level domain. Lower level registrars may have different rules.
1076            if (ch == '.')
1077                break;
1078
1079            return NO;
1080        }
1081        return YES;
1082    }
1083
1084    // Not a known top level domain with special rules.
1085    return NO;
1086}
1087
1088// Return value of nil means no mapping is necessary.
1089// If makeString is NO, then return value is either nil or self to indicate mapping is necessary.
1090// If makeString is YES, then return value is either nil or the mapped string.
1091- (NSString *)_web_mapHostNameWithRange:(NSRange)range encode:(BOOL)encode makeString:(BOOL)makeString
1092{
1093    if (range.length > HOST_NAME_BUFFER_LENGTH) {
1094        return nil;
1095    }
1096
1097    if ([self length] == 0)
1098        return nil;
1099
1100    UChar sourceBuffer[HOST_NAME_BUFFER_LENGTH];
1101    UChar destinationBuffer[HOST_NAME_BUFFER_LENGTH];
1102
1103    NSString *string = self;
1104    if (encode && [self rangeOfString:@"%" options:NSLiteralSearch range:range].location != NSNotFound) {
1105        NSString *substring = [self substringWithRange:range];
1106        substring = WebCFAutorelease(CFURLCreateStringByReplacingPercentEscapes(NULL, (CFStringRef)substring, CFSTR("")));
1107        if (substring != nil) {
1108            string = substring;
1109            range = NSMakeRange(0, [string length]);
1110        }
1111    }
1112
1113    int length = range.length;
1114    [string getCharacters:sourceBuffer range:range];
1115
1116    UErrorCode error = U_ZERO_ERROR;
1117    int32_t numCharactersConverted = (encode ? uidna_IDNToASCII : uidna_IDNToUnicode)
1118        (sourceBuffer, length, destinationBuffer, HOST_NAME_BUFFER_LENGTH, UIDNA_ALLOW_UNASSIGNED, NULL, &error);
1119    if (error != U_ZERO_ERROR) {
1120        return nil;
1121    }
1122    if (numCharactersConverted == length && memcmp(sourceBuffer, destinationBuffer, length * sizeof(UChar)) == 0) {
1123        return nil;
1124    }
1125    if (!encode && !allCharactersInIDNScriptWhiteList(destinationBuffer, numCharactersConverted) && !allCharactersAllowedByTLDRules(destinationBuffer, numCharactersConverted)) {
1126        return nil;
1127    }
1128    return makeString ? (NSString *)[NSString stringWithCharacters:destinationBuffer length:numCharactersConverted] : (NSString *)self;
1129}
1130
1131- (BOOL)_web_hostNameNeedsDecodingWithRange:(NSRange)range
1132{
1133    return [self _web_mapHostNameWithRange:range encode:NO makeString:NO] != nil;
1134}
1135
1136- (BOOL)_web_hostNameNeedsEncodingWithRange:(NSRange)range
1137{
1138    return [self _web_mapHostNameWithRange:range encode:YES makeString:NO] != nil;
1139}
1140
1141- (NSString *)_web_decodeHostNameWithRange:(NSRange)range
1142{
1143    return [self _web_mapHostNameWithRange:range encode:NO makeString:YES];
1144}
1145
1146- (NSString *)_web_encodeHostNameWithRange:(NSRange)range
1147{
1148    return [self _web_mapHostNameWithRange:range encode:YES makeString:YES];
1149}
1150
1151- (NSString *)_web_decodeHostName
1152{
1153    NSString *name = [self _web_mapHostNameWithRange:NSMakeRange(0, [self length]) encode:NO makeString:YES];
1154    return name == nil ? self : name;
1155}
1156
1157- (NSString *)_web_encodeHostName
1158{
1159    NSString *name = [self _web_mapHostNameWithRange:NSMakeRange(0, [self length]) encode:YES makeString:YES];
1160    return name == nil ? self : name;
1161}
1162
1163-(NSRange)_webkit_rangeOfURLScheme
1164{
1165    NSRange colon = [self rangeOfString:@":"];
1166    if (colon.location != NSNotFound && colon.location > 0) {
1167        NSRange scheme = {0, colon.location};
1168        static NSCharacterSet *InverseSchemeCharacterSet = nil;
1169        if (!InverseSchemeCharacterSet) {
1170            /*
1171             This stuff is very expensive.  10-15 msec on a 2x1.2GHz.  If not cached it swamps
1172             everything else when adding items to the autocomplete DB.  Makes me wonder if we
1173             even need to enforce the character set here.
1174            */
1175            NSString *acceptableCharacters = @"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+.-";
1176            InverseSchemeCharacterSet = [[[NSCharacterSet characterSetWithCharactersInString:acceptableCharacters] invertedSet] retain];
1177        }
1178        NSRange illegals = [self rangeOfCharacterFromSet:InverseSchemeCharacterSet options:0 range:scheme];
1179        if (illegals.location == NSNotFound)
1180            return scheme;
1181    }
1182    return NSMakeRange(NSNotFound, 0);
1183}
1184
1185-(BOOL)_webkit_looksLikeAbsoluteURL
1186{
1187    // Trim whitespace because _web_URLWithString allows whitespace.
1188    return [[self _webkit_stringByTrimmingWhitespace] _webkit_rangeOfURLScheme].location != NSNotFound;
1189}
1190
1191- (NSString *)_webkit_URLFragment
1192{
1193    NSRange fragmentRange;
1194
1195    fragmentRange = [self rangeOfString:@"#" options:NSLiteralSearch];
1196    if (fragmentRange.location == NSNotFound)
1197        return nil;
1198    return [self substringFromIndex:fragmentRange.location + 1];
1199}
1200
1201@end
1202