<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><meta http-equiv="content-type" content="text/html; charset=utf-8" /><style type="text/css"><!--
#msg dl { border: 1px #006 solid; background: #369; padding: 6px; color: #fff; }
#msg dt { float: left; width: 6em; font-weight: bold; }
#msg dt:after { content:':';}
#msg dl, #msg dt, #msg ul, #msg li, #header, #footer { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; }
#msg dl a { font-weight: bold}
#msg dl a:link { color:#fc3; }
#msg dl a:active { color:#ff0; }
#msg dl a:visited { color:#cc6; }
h3 { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; font-weight: bold; }
#msg pre { overflow: auto; background: #ffc; border: 1px #fc0 solid; padding: 6px; }
#msg ul, pre { overflow: auto; }
#header, #footer { color: #fff; background: #636; border: 1px #300 solid; padding: 6px; }
#patch { width: 100%; }
#patch h4 {font-family: verdana,arial,helvetica,sans-serif;font-size:10pt;padding:8px;background:#369;color:#fff;margin:0;}
#patch .propset h4, #patch .binary h4 {margin:0;}
#patch pre {padding:0;line-height:1.2em;margin:0;}
#patch .diff {width:100%;background:#eee;padding: 0 0 10px 0;overflow:auto;}
#patch .propset .diff, #patch .binary .diff {padding:10px 0;}
#patch span {display:block;padding:0 10px;}
#patch .modfile, #patch .addfile, #patch .delfile, #patch .propset, #patch .binary, #patch .copfile {border:1px solid #ccc;margin:10px 0;}
#patch ins {background:#dfd;text-decoration:none;display:block;padding:0 10px;}
#patch del {background:#fdd;text-decoration:none;display:block;padding:0 10px;}
#patch .lines, .info {color:#888;background:#fff;}
--></style>
<title>[24812] trunk/WebKit</title>
</head>
<body>
<div id="msg">
<dl>
<dt>Revision</dt> <dd><a href="http://trac.webkit.org/projects/webkit/changeset/24812">24812</a></dd>
<dt>Author</dt> <dd>aliceli1</dd>
<dt>Date</dt> <dd>2007-08-01 18:18:37 -0700 (Wed, 01 Aug 2007)</dd>
</dl>
<h3>Log Message</h3>
<pre> Reviewed by .
Making WebNSURLExtras objc++
* Misc/WebNSURLExtras.m: Removed.
* Misc/WebNSURLExtras.mm: Copied from WebKit/Misc/WebNSURLExtras.m.
* WebKit.xcodeproj/project.pbxproj:</pre>
<h3>Modified Paths</h3>
<ul>
<li><a href="#trunkWebKitChangeLog">trunk/WebKit/ChangeLog</a></li>
<li><a href="#trunkWebKitWebKitxcodeprojprojectpbxproj">trunk/WebKit/WebKit.xcodeproj/project.pbxproj</a></li>
</ul>
<h3>Added Paths</h3>
<ul>
<li><a href="#trunkWebKitMiscWebNSURLExtrasmm">trunk/WebKit/Misc/WebNSURLExtras.mm</a></li>
</ul>
<h3>Removed Paths</h3>
<ul>
<li><a href="#trunkWebKitMiscWebNSURLExtrasm">trunk/WebKit/Misc/WebNSURLExtras.m</a></li>
</ul>
</div>
<div id="patch">
<h3>Diff</h3>
<a id="trunkWebKitChangeLog"></a>
<div class="modfile"><h4>Modified: trunk/WebKit/ChangeLog (24811 => 24812)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/WebKit/ChangeLog        2007-08-02 01:15:23 UTC (rev 24811)
+++ trunk/WebKit/ChangeLog        2007-08-02 01:18:37 UTC (rev 24812)
</span><span class="lines">@@ -1,3 +1,13 @@
</span><ins>+2007-08-01 Alice Liu <alice.liu@apple.com>
+
+ Reviewed by .
+
+ Making WebNSURLExtras objc++
+
+ * Misc/WebNSURLExtras.m: Removed.
+ * Misc/WebNSURLExtras.mm: Copied from WebKit/Misc/WebNSURLExtras.m.
+ * WebKit.xcodeproj/project.pbxproj:
+
</ins><span class="cx"> 2007-08-01 Darin Adler <darin@apple.com>
</span><span class="cx">
</span><span class="cx"> Reviewed by Anders Carlsson and Kevin Decker.
</span></span></pre></div>
<a id="trunkWebKitMiscWebNSURLExtrasm"></a>
<div class="delfile"><h4>Deleted: trunk/WebKit/Misc/WebNSURLExtras.m (24811 => 24812)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/WebKit/Misc/WebNSURLExtras.m        2007-08-02 01:15:23 UTC (rev 24811)
+++ trunk/WebKit/Misc/WebNSURLExtras.m        2007-08-02 01:18:37 UTC (rev 24812)
</span><span class="lines">@@ -1,1029 +0,0 @@
</span><del>-/*
- * Copyright (C) 2005 Apple Computer, Inc. All rights reserved.
- * Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of
- * its contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#import <WebKit/WebNSURLExtras.h>
-
-#import <JavaScriptCore/Assertions.h>
-#import <WebKit/WebKitNSStringExtras.h>
-#import <WebKit/WebNSDataExtras.h>
-#import <WebKit/WebNSObjectExtras.h>
-#import <WebKit/WebLocalizableStrings.h>
-#import <WebCore/LoaderNSURLExtras.h>
-
-#import <WebKitSystemInterface.h>
-#import "WebSystemInterface.h"
-
-#import <Foundation/NSURLRequest.h>
-
-#import <unicode/uchar.h>
-#import <unicode/uidna.h>
-#import <unicode/uscript.h>
-
-typedef void (* StringRangeApplierFunction)(NSString *string, NSRange range, void *context);
-
-// Needs to be big enough to hold an IDN-encoded name.
-// For host names bigger than this, we won't do IDN encoding, which is almost certainly OK.
-#define HOST_NAME_BUFFER_LENGTH 2048
-
-#define URL_BYTES_BUFFER_LENGTH 2048
-
-static pthread_once_t IDNScriptWhiteListFileRead = PTHREAD_ONCE_INIT;
-static uint32_t IDNScriptWhiteList[(USCRIPT_CODE_LIMIT + 31) / 32];
-
-static inline BOOL isLookalikeCharacter(int charCode)
-{
-// FIXME: Move this code down into WebCore so it can be shared with other platforms.
-
-// This function treats the following as unsafe, lookalike characters:
-// any non-printable character, any character considered as whitespace that isn't already converted to a space by ICU,
-// and any ignorable character.
-
-// We also considered the characters in Mozilla's blacklist (http://kb.mozillazine.org/Network.IDN.blacklist_chars),
-// and included all of these characters that ICU can encode.
-
- if (!u_isprint(charCode) || u_isUWhiteSpace(charCode) || u_hasBinaryProperty(charCode, UCHAR_DEFAULT_IGNORABLE_CODE_POINT))
- return YES;
-
- switch (charCode) {
- case 0x01C3: /* LATIN LETTER RETROFLEX CLICK */
- case 0x0337: /* COMBINING SHORT SOLIDUS OVERLAY */
- case 0x0338: /* COMBINING LONG SOLIDUS OVERLAY */
- case 0x05B4: /* HEBREW POINT HIRIQ */
- case 0x05BC: /* HEBREW POINT DAGESH OR MAPIQ */
- case 0x05C3: /* HEBREW PUNCTUATION SOF PASUQ */
- case 0x05F4: /* HEBREW PUNCTUATION GERSHAYIM */
- case 0x0660: /* ARABIC INDIC DIGIT ZERO */
- case 0x06D4: /* ARABIC FULL STOP */
- case 0x06F0: /* EXTENDED ARABIC INDIC DIGIT ZERO */
- case 0x2027: /* HYPHENATION POINT */
- case 0x2039: /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */
- case 0x203A: /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */
- case 0x2044: /* FRACTION SLASH */
- case 0x2215: /* DIVISION SLASH */
- case 0x23ae: /* INTEGRAL EXTENSION */
- case 0x2571: /* BOX DRAWINGS LIGHT DIAGONAL UPPER RIGHT TO LOWER LEFT */
- case 0x29F8: /* BIG SOLIDUS */
- case 0x29f6: /* SOLIDUS WITH OVERBAR */
- case 0x2AFB: /* TRIPLE SOLIDUS BINARY RELATION */
- case 0x2AFD: /* DOUBLE SOLIDUS OPERATOR */
- case 0x3008: /* LEFT ANGLE BRACKET */
- case 0x3014: /* LEFT TORTOISE SHELL BRACKET */
- case 0x3015: /* RIGHT TORTOISE SHELL BRACKET */
- case 0x3033: /* VERTICAL KANA REPEAT MARK UPPER HALF */
- case 0x321D: /* PARENTHESIZED KOREAN CHARACTER OJEON */
- case 0x321E: /* PARENTHESIZED KOREAN CHARACTER O HU */
- case 0x33DF: /* SQUARE A OVER M */
- case 0xFE14: /* PRESENTATION FORM FOR VERTICAL SEMICOLON */
- case 0xFE15: /* PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK */
- case 0xFE3F: /* PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET */
- case 0xFE5D: /* SMALL LEFT TORTOISE SHELL BRACKET */
- case 0xFE5E: /* SMALL RIGHT TORTOISE SHELL BRACKET */
- return YES;
- default:
- return NO;
- }
-}
-
-static char hexDigit(int i)
-{
- if (i < 0 || i > 16) {
- LOG_ERROR("illegal hex digit");
- return '0';
- }
- int h = i;
- if (h >= 10) {
- h = h - 10 + 'A';
- }
- else {
- h += '0';
- }
- return h;
-}
-
-static BOOL isHexDigit(char c)
-{
- return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
-}
-
-static int hexDigitValue(char c)
-{
- if (c >= '0' && c <= '9') {
- return c - '0';
- }
- if (c >= 'A' && c <= 'F') {
- return c - 'A' + 10;
- }
- if (c >= 'a' && c <= 'f') {
- return c - 'a' + 10;
- }
- LOG_ERROR("illegal hex digit");
- return 0;
-}
-
-static void applyHostNameFunctionToMailToURLString(NSString *string, StringRangeApplierFunction f, void *context)
-{
- // In a mailto: URL, host names come after a '@' character and end with a '>' or ',' or '?' character.
- // Skip quoted strings so that characters in them don't confuse us.
- // When we find a '?' character, we are past the part of the URL that contains host names.
-
- static NSCharacterSet *hostNameOrStringStartCharacters;
- if (hostNameOrStringStartCharacters == nil) {
- hostNameOrStringStartCharacters = [NSCharacterSet characterSetWithCharactersInString:@"\"@?"];
- CFRetain(hostNameOrStringStartCharacters);
- }
- static NSCharacterSet *hostNameEndCharacters;
- if (hostNameEndCharacters == nil) {
- hostNameEndCharacters = [NSCharacterSet characterSetWithCharactersInString:@">,?"];
- CFRetain(hostNameEndCharacters);
- }
- static NSCharacterSet *quotedStringCharacters;
- if (quotedStringCharacters == nil) {
- quotedStringCharacters = [NSCharacterSet characterSetWithCharactersInString:@"\"\\"];
- CFRetain(quotedStringCharacters);
- }
-
- unsigned stringLength = [string length];
- NSRange remaining = NSMakeRange(0, stringLength);
-
- while (1) {
- // Find start of host name or of quoted string.
- NSRange hostNameOrStringStart = [string rangeOfCharacterFromSet:hostNameOrStringStartCharacters options:0 range:remaining];
- if (hostNameOrStringStart.location == NSNotFound) {
- return;
- }
- unichar c = [string characterAtIndex:hostNameOrStringStart.location];
- remaining.location = NSMaxRange(hostNameOrStringStart);
- remaining.length = stringLength - remaining.location;
-
- if (c == '?') {
- return;
- }
-
- if (c == '@') {
- // Find end of host name.
- unsigned hostNameStart = remaining.location;
- NSRange hostNameEnd = [string rangeOfCharacterFromSet:hostNameEndCharacters options:0 range:remaining];
- BOOL done;
- if (hostNameEnd.location == NSNotFound) {
- hostNameEnd.location = stringLength;
- done = YES;
- } else {
- remaining.location = hostNameEnd.location;
- remaining.length = stringLength - remaining.location;
- done = NO;
- }
-
- // Process host name range.
- f(string, NSMakeRange(hostNameStart, hostNameEnd.location - hostNameStart), context);
-
- if (done) {
- return;
- }
- } else {
- // Skip quoted string.
- ASSERT(c == '"');
- while (1) {
- NSRange escapedCharacterOrStringEnd = [string rangeOfCharacterFromSet:quotedStringCharacters options:0 range:remaining];
- if (escapedCharacterOrStringEnd.location == NSNotFound) {
- return;
- }
- c = [string characterAtIndex:escapedCharacterOrStringEnd.location];
- remaining.location = NSMaxRange(escapedCharacterOrStringEnd);
- remaining.length = stringLength - remaining.location;
-
- // If we are the end of the string, then break from the string loop back to the host name loop.
- if (c == '"') {
- break;
- }
-
- // Skip escaped character.
- ASSERT(c == '\\');
- if (remaining.length == 0) {
- return;
- }
- remaining.location += 1;
- remaining.length -= 1;
- }
- }
- }
-}
-
-static void applyHostNameFunctionToURLString(NSString *string, StringRangeApplierFunction f, void *context)
-{
- // Find hostnames. Too bad we can't use any real URL-parsing code to do this,
- // but we have to do it before doing all the %-escaping, and this is the only
- // code we have that parses mailto URLs anyway.
-
- // Maybe we should implement this using a character buffer instead?
-
- if ([string _webkit_hasCaseInsensitivePrefix:@"mailto:"]) {
- applyHostNameFunctionToMailToURLString(string, f, context);
- return;
- }
-
- // Find the host name in a hierarchical URL.
- // It comes after a "://" sequence, with scheme characters preceding.
- // If ends with the end of the string or a ":", "/", or a "?".
- // If there is a "@" character, the host part is just the part after the "@".
- NSRange separatorRange = [string rangeOfString:@"://"];
- if (separatorRange.location == NSNotFound) {
- return;
- }
-
- // Check that all characters before the :// are valid scheme characters.
- static NSCharacterSet *nonSchemeCharacters;
- if (nonSchemeCharacters == nil) {
- nonSchemeCharacters = [[NSCharacterSet characterSetWithCharactersInString:@"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-."] invertedSet];
- CFRetain(nonSchemeCharacters);
- }
- if ([string rangeOfCharacterFromSet:nonSchemeCharacters options:0 range:NSMakeRange(0, separatorRange.location)].location != NSNotFound) {
- return;
- }
-
- unsigned stringLength = [string length];
-
- static NSCharacterSet *hostTerminators;
- if (hostTerminators == nil) {
- hostTerminators = [NSCharacterSet characterSetWithCharactersInString:@":/?#"];
- CFRetain(hostTerminators);
- }
-
- // Start after the separator.
- unsigned authorityStart = NSMaxRange(separatorRange);
-
- // Find terminating character.
- NSRange hostNameTerminator = [string rangeOfCharacterFromSet:hostTerminators options:0 range:NSMakeRange(authorityStart, stringLength - authorityStart)];
- unsigned hostNameEnd = hostNameTerminator.location == NSNotFound ? stringLength : hostNameTerminator.location;
-
- // Find "@" for the start of the host name.
- NSRange userInfoTerminator = [string rangeOfString:@"@" options:0 range:NSMakeRange(authorityStart, hostNameEnd - authorityStart)];
- unsigned hostNameStart = userInfoTerminator.location == NSNotFound ? authorityStart : NSMaxRange(userInfoTerminator);
-
- f(string, NSMakeRange(hostNameStart, hostNameEnd - hostNameStart), context);
-}
-
-@implementation NSURL (WebNSURLExtras)
-
-static void collectRangesThatNeedMapping(NSString *string, NSRange range, void *context, BOOL encode)
-{
- BOOL needsMapping = encode
- ? [string _web_hostNameNeedsEncodingWithRange:range]
- : [string _web_hostNameNeedsDecodingWithRange:range];
- if (!needsMapping) {
- return;
- }
-
- NSMutableArray **array = (NSMutableArray **)context;
- if (*array == nil) {
- *array = [[NSMutableArray alloc] init];
- }
-
- [*array addObject:[NSValue valueWithRange:range]];
-}
-
-static void collectRangesThatNeedEncoding(NSString *string, NSRange range, void *context)
-{
- return collectRangesThatNeedMapping(string, range, context, YES);
-}
-
-static void collectRangesThatNeedDecoding(NSString *string, NSRange range, void *context)
-{
- return collectRangesThatNeedMapping(string, range, context, NO);
-}
-
-static NSString *mapHostNames(NSString *string, BOOL encode)
-{
- // Generally, we want to optimize for the case where there is one host name that does not need mapping.
-
- if (encode && [string canBeConvertedToEncoding:NSASCIIStringEncoding])
- return string;
-
- // Make a list of ranges that actually need mapping.
- NSMutableArray *hostNameRanges = nil;
- StringRangeApplierFunction f = encode
- ? collectRangesThatNeedEncoding
- : collectRangesThatNeedDecoding;
- applyHostNameFunctionToURLString(string, f, &hostNameRanges);
- if (hostNameRanges == nil)
- return string;
-
- // Do the mapping.
- NSMutableString *mutableCopy = [string mutableCopy];
- unsigned i = [hostNameRanges count];
- while (i-- != 0) {
- NSRange hostNameRange = [[hostNameRanges objectAtIndex:i] rangeValue];
- NSString *mappedHostName = encode
- ? [string _web_encodeHostNameWithRange:hostNameRange]
- : [string _web_decodeHostNameWithRange:hostNameRange];
- [mutableCopy replaceCharactersInRange:hostNameRange withString:mappedHostName];
- }
- [hostNameRanges release];
- return [mutableCopy autorelease];
-}
-
-+ (NSURL *)_web_URLWithUserTypedString:(NSString *)string relativeToURL:(NSURL *)URL
-{
- if (string == nil) {
- return nil;
- }
- string = mapHostNames([string _webkit_stringByTrimmingWhitespace], YES);
-
- NSData *userTypedData = [string dataUsingEncoding:NSUTF8StringEncoding];
- ASSERT(userTypedData);
-
- const UInt8 *inBytes = [userTypedData bytes];
- int inLength = [userTypedData length];
- if (inLength == 0) {
- return [NSURL URLWithString:@""];
- }
-
- char *outBytes = malloc(inLength * 3); // large enough to %-escape every character
- char *p = outBytes;
- int outLength = 0;
- int i;
- for (i = 0; i < inLength; i++) {
- UInt8 c = inBytes[i];
- if (c <= 0x20 || c >= 0x7f) {
- *p++ = '%';
- *p++ = hexDigit(c >> 4);
- *p++ = hexDigit(c & 0xf);
- outLength += 3;
- }
- else {
- *p++ = c;
- outLength++;
- }
- }
-
- NSData *data = [NSData dataWithBytesNoCopy:outBytes length:outLength]; // adopts outBytes
- return [self _web_URLWithData:data relativeToURL:URL];
-}
-
-+ (NSURL *)_web_URLWithUserTypedString:(NSString *)string
-{
- return [self _web_URLWithUserTypedString:string relativeToURL:nil];
-}
-
-+ (NSURL *)_web_URLWithDataAsString:(NSString *)string
-{
- if (string == nil) {
- return nil;
- }
- return [self _web_URLWithDataAsString:string relativeToURL:nil];
-}
-
-+ (NSURL *)_web_URLWithDataAsString:(NSString *)string relativeToURL:(NSURL *)baseURL
-{
- if (string == nil) {
- return nil;
- }
- string = [string _webkit_stringByTrimmingWhitespace];
- NSData *data = [string dataUsingEncoding:NSISOLatin1StringEncoding];
- return [self _web_URLWithData:data relativeToURL:baseURL];
-}
-
-+ (NSURL *)_web_URLWithData:(NSData *)data
-{
- return urlWithData(data);
-}
-
-+ (NSURL *)_web_URLWithData:(NSData *)data relativeToURL:(NSURL *)baseURL
-{
- return urlWithDataRelativeToURL(data, baseURL);
-}
-
-- (NSData *)_web_originalData
-{
- return urlOriginalData(self);
-}
-
-- (NSString *)_web_originalDataAsString
-{
- return urlOriginalDataAsString(self);
-}
-
-- (NSString *)_web_userVisibleString
-{
- NSData *data = [self _web_originalData];
- const unsigned char *before = [data bytes];
- int length = [data length];
-
- bool needsHostNameDecoding = false;
-
- const unsigned char *p = before;
- int bufferLength = (length * 3) + 1;
- char *after = malloc(bufferLength); // large enough to %-escape every character
- char *q = after;
- int i;
- for (i = 0; i < length; i++) {
- unsigned char c = p[i];
- // escape control characters, space, and delete
- if (c <= 0x20 || c == 0x7f) {
- *q++ = '%';
- *q++ = hexDigit(c >> 4);
- *q++ = hexDigit(c & 0xf);
- }
- // unescape escape sequences that indicate bytes greater than 0x7f
- else if (c == '%' && (i + 1 < length && isHexDigit(p[i + 1])) && i + 2 < length && isHexDigit(p[i + 2])) {
- unsigned char u = (hexDigitValue(p[i + 1]) << 4) | hexDigitValue(p[i + 2]);
- if (u > 0x7f) {
- // unescape
- *q++ = u;
- }
- else {
- // do not unescape
- *q++ = p[i];
- *q++ = p[i + 1];
- *q++ = p[i + 2];
- }
- i += 2;
- }
- else {
- *q++ = c;
-
- // Check for "xn--" in an efficient, non-case-sensitive, way.
- if (c == '-' && i >= 3 && !needsHostNameDecoding && (q[-4] | 0x20) == 'x' && (q[-3] | 0x20) == 'n' && q[-2] == '-')
- needsHostNameDecoding = true;
- }
- }
- *q = '\0';
-
- // Check string to see if it can be converted to display using UTF-8
- NSString *result = [NSString stringWithUTF8String:after];
- if (!result) {
- // Could not convert to UTF-8.
- // Convert characters greater than 0x7f to escape sequences.
- // Shift current string to the end of the buffer
- // then we will copy back bytes to the start of the buffer
- // as we convert.
- int afterlength = q - after;
- char *p = after + bufferLength - afterlength - 1;
- memmove(p, after, afterlength + 1); // copies trailing '\0'
- char *q = after;
- while (*p) {
- unsigned char c = *p;
- if (c > 0x7f) {
- *q++ = '%';
- *q++ = hexDigit(c >> 4);
- *q++ = hexDigit(c & 0xf);
- }
- else {
- *q++ = *p;
- }
- p++;
- }
- *q = '\0';
- result = [NSString stringWithUTF8String:after];
- }
-
- free(after);
-
- // As an optimization, only do host name decoding if we have "xn--" somewhere.
- return needsHostNameDecoding ? mapHostNames(result, NO) : result;
-}
-
-- (BOOL)_web_isEmpty
-{
- return urlIsEmpty(self);
-}
-
-- (const char *)_web_URLCString
-{
- NSMutableData *data = [NSMutableData data];
- [data appendData:[self _web_originalData]];
- [data appendBytes:"\0" length:1];
- return (const char *)[data bytes];
- }
-
-- (NSURL *)_webkit_canonicalize
-{
- InitWebCoreSystemInterface();
- return canonicalURL(self);
-}
-
-typedef struct {
- NSString *scheme;
- NSString *user;
- NSString *password;
- NSString *host;
- CFIndex port; // kCFNotFound means ignore/omit
- NSString *path;
- NSString *query;
- NSString *fragment;
-} WebKitURLComponents;
-
-
-
-- (NSURL *)_webkit_URLByRemovingComponent:(CFURLComponentType)component
-{
- return urlByRemovingComponent(self, component);
-}
-
-- (NSURL *)_webkit_URLByRemovingFragment
-{
- return urlByRemovingFragment(self);
-}
-
-- (NSURL *)_webkit_URLByRemovingResourceSpecifier
-{
- return urlByRemovingResourceSpecifier(self);
-}
-
-- (BOOL)_webkit_isJavaScriptURL
-{
- return [[self _web_originalDataAsString] _webkit_isJavaScriptURL];
-}
-
-- (NSString *)_webkit_scriptIfJavaScriptURL
-{
- return [[self absoluteString] _webkit_scriptIfJavaScriptURL];
-}
-
-- (BOOL)_webkit_isFileURL
-{
- return urlIsFileURL(self);
-}
-
-- (BOOL)_webkit_isFTPDirectoryURL
-{
- return [[self _web_originalDataAsString] _webkit_isFTPDirectoryURL];
-}
-
-- (BOOL)_webkit_shouldLoadAsEmptyDocument
-{
- return [[self _web_originalDataAsString] _webkit_hasCaseInsensitivePrefix:@"about:"] || [self _web_isEmpty];
-}
-
-- (NSURL *)_web_URLWithLowercasedScheme
-{
- CFRange range;
- CFURLGetByteRangeForComponent((CFURLRef)self, kCFURLComponentScheme, &range);
- if (range.location == kCFNotFound) {
- return self;
- }
-
- UInt8 static_buffer[URL_BYTES_BUFFER_LENGTH];
- UInt8 *buffer = static_buffer;
- CFIndex bytesFilled = CFURLGetBytes((CFURLRef)self, buffer, URL_BYTES_BUFFER_LENGTH);
- if (bytesFilled == -1) {
- CFIndex bytesToAllocate = CFURLGetBytes((CFURLRef)self, NULL, 0);
- buffer = malloc(bytesToAllocate);
- bytesFilled = CFURLGetBytes((CFURLRef)self, buffer, bytesToAllocate);
- ASSERT(bytesFilled == bytesToAllocate);
- }
-
- int i;
- BOOL changed = NO;
- for (i = 0; i < range.length; ++i) {
- UInt8 c = buffer[range.location + i];
- UInt8 lower = tolower(c);
- if (c != lower) {
- buffer[range.location + i] = lower;
- changed = YES;
- }
- }
-
- NSURL *result = changed
- ? (NSURL *)WebCFAutorelease(CFURLCreateAbsoluteURLWithBytes(NULL, buffer, bytesFilled, kCFStringEncodingUTF8, nil, YES))
- : (NSURL *)self;
-
- if (buffer != static_buffer) {
- free(buffer);
- }
-
- return result;
-}
-
-
--(BOOL)_web_hasQuestionMarkOnlyQueryString
-{
- CFRange rangeWithSeparators;
- CFURLGetByteRangeForComponent((CFURLRef)self, kCFURLComponentQuery, &rangeWithSeparators);
- if (rangeWithSeparators.location != kCFNotFound && rangeWithSeparators.length == 1) {
- return YES;
- }
- return NO;
-}
-
--(NSData *)_web_schemeSeparatorWithoutColon
-{
- NSData *result = nil;
- CFRange rangeWithSeparators;
- CFRange range = CFURLGetByteRangeForComponent((CFURLRef)self, kCFURLComponentScheme, &rangeWithSeparators);
- if (rangeWithSeparators.location != kCFNotFound) {
- NSString *absoluteString = [self absoluteString];
- NSRange separatorsRange = NSMakeRange(range.location + range.length + 1, rangeWithSeparators.length - range.length - 1);
- if (separatorsRange.location + separatorsRange.length <= [absoluteString length]) {
- NSString *slashes = [absoluteString substringWithRange:separatorsRange];
- result = [slashes dataUsingEncoding:NSISOLatin1StringEncoding];
- }
- }
- return result;
-}
-
-#define completeURL (CFURLComponentType)-1
-
--(NSData *)_web_dataForURLComponentType:(CFURLComponentType)componentType
-{
- static int URLComponentTypeBufferLength = 2048;
-
- UInt8 staticAllBytesBuffer[URLComponentTypeBufferLength];
- UInt8 *allBytesBuffer = staticAllBytesBuffer;
-
- CFIndex bytesFilled = CFURLGetBytes((CFURLRef)self, allBytesBuffer, URLComponentTypeBufferLength);
- if (bytesFilled == -1) {
- CFIndex bytesToAllocate = CFURLGetBytes((CFURLRef)self, NULL, 0);
- allBytesBuffer = malloc(bytesToAllocate);
- bytesFilled = CFURLGetBytes((CFURLRef)self, allBytesBuffer, bytesToAllocate);
- }
-
- CFRange range;
- if (componentType != completeURL) {
- range = CFURLGetByteRangeForComponent((CFURLRef)self, componentType, NULL);
- if (range.location == kCFNotFound) {
- return nil;
- }
- }
- else {
- range.location = 0;
- range.length = bytesFilled;
- }
-
- NSData *componentData = [NSData dataWithBytes:allBytesBuffer + range.location length:range.length];
-
- const unsigned char *bytes = [componentData bytes];
- NSMutableData *resultData = [NSMutableData data];
- // NOTE: add leading '?' to query strings non-zero length query strings.
- // NOTE: retain question-mark only query strings.
- if (componentType == kCFURLComponentQuery) {
- if (range.length > 0 || [self _web_hasQuestionMarkOnlyQueryString]) {
- [resultData appendBytes:"?" length:1];
- }
- }
- int i;
- for (i = 0; i < range.length; i++) {
- unsigned char c = bytes[i];
- if (c <= 0x20 || c >= 0x7f) {
- char escaped[3];
- escaped[0] = '%';
- escaped[1] = hexDigit(c >> 4);
- escaped[2] = hexDigit(c & 0xf);
- [resultData appendBytes:escaped length:3];
- }
- else {
- char b[1];
- b[0] = c;
- [resultData appendBytes:b length:1];
- }
- }
-
- if (staticAllBytesBuffer != allBytesBuffer) {
- free(allBytesBuffer);
- }
-
- return resultData;
-}
-
--(NSData *)_web_schemeData
-{
- return [self _web_dataForURLComponentType:kCFURLComponentScheme];
-}
-
--(NSData *)_web_hostData
-{
- NSData *result = [self _web_dataForURLComponentType:kCFURLComponentHost];
- NSData *scheme = [self _web_schemeData];
- // Take off localhost for file
- if ([scheme _web_isCaseInsensitiveEqualToCString:"file"]) {
- return ([result _web_isCaseInsensitiveEqualToCString:"localhost"]) ? nil : result;
- }
- return result;
-}
-
-- (NSString *)_web_hostString
-{
- NSData *data = [self _web_hostData];
- if (!data) {
- data = [NSData data];
- }
- return [[[NSString alloc] initWithData:[self _web_hostData] encoding:NSUTF8StringEncoding] autorelease];
-}
-
-- (NSString *)_webkit_suggestedFilenameWithMIMEType:(NSString *)MIMEType
-{
- return suggestedFilenameWithMIMEType(self, MIMEType);
-}
-
-@end
-
-@implementation NSString (WebNSURLExtras)
-
-- (BOOL)_web_isUserVisibleURL
-{
- BOOL valid = YES;
- // get buffer
-
- char static_buffer[1024];
- const char *p;
- BOOL success = CFStringGetCString((CFStringRef)self, static_buffer, 1023, kCFStringEncodingUTF8);
- if (success) {
- p = static_buffer;
- } else {
- p = [self UTF8String];
- }
-
- int length = strlen(p);
-
- // check for characters <= 0x20 or >=0x7f, %-escape sequences of %7f, and xn--, these
- // are the things that will lead _web_userVisibleString to actually change things.
- int i;
- for (i = 0; i < length; i++) {
- unsigned char c = p[i];
- // escape control characters, space, and delete
- if (c <= 0x20 || c == 0x7f) {
- valid = NO;
- break;
- } else if (c == '%' && (i + 1 < length && isHexDigit(p[i + 1])) && i + 2 < length && isHexDigit(p[i + 2])) {
- unsigned char u = (hexDigitValue(p[i + 1]) << 4) | hexDigitValue(p[i + 2]);
- if (u > 0x7f) {
- valid = NO;
- break;
- }
- i += 2;
- } else {
- // Check for "xn--" in an efficient, non-case-sensitive, way.
- if (c == '-' && i >= 3 && (p[i - 3] | 0x20) == 'x' && (p[i - 2] | 0x20) == 'n' && p[i - 1] == '-') {
- valid = NO;
- break;
- }
- }
- }
-
- return valid;
-}
-
-
-- (BOOL)_webkit_isJavaScriptURL
-{
- return [self _webkit_hasCaseInsensitivePrefix:@"javascript:"];
-}
-
-- (BOOL)_webkit_isFileURL
-{
- return stringIsFileURL(self);
-}
-
-- (NSString *)_webkit_stringByReplacingValidPercentEscapes
-{
- NSString *s = [self stringByReplacingPercentEscapesUsingEncoding:NSUTF8StringEncoding];
- return s ? s : self;
-}
-
-- (NSString *)_webkit_scriptIfJavaScriptURL
-{
- if (![self _webkit_isJavaScriptURL]) {
- return nil;
- }
- return [[self substringFromIndex:11] _webkit_stringByReplacingValidPercentEscapes];
-}
-
-- (BOOL)_webkit_isFTPDirectoryURL
-{
- int length = [self length];
- if (length < 5) { // 5 is length of "ftp:/"
- return NO;
- }
- unichar lastChar = [self characterAtIndex:length - 1];
- return lastChar == '/' && [self _webkit_hasCaseInsensitivePrefix:@"ftp:"];
-}
-
-
-static BOOL readIDNScriptWhiteListFile(NSString *filename)
-{
- if (!filename) {
- return NO;
- }
- FILE *file = fopen([filename fileSystemRepresentation], "r");
- if (file == NULL) {
- return NO;
- }
-
- // Read a word at a time.
- // Allow comments, starting with # character to the end of the line.
- while (1) {
- // Skip a comment if present.
- int result = fscanf(file, " #%*[^\n\r]%*[\n\r]");
- if (result == EOF) {
- break;
- }
-
- // Read a script name if present.
- char word[33];
- result = fscanf(file, " %32[^# \t\n\r]%*[^# \t\n\r] ", word);
- if (result == EOF) {
- break;
- }
- if (result == 1) {
- // Got a word, map to script code and put it into the array.
- int32_t script = u_getPropertyValueEnum(UCHAR_SCRIPT, word);
- if (script >= 0 && script < USCRIPT_CODE_LIMIT) {
- size_t index = script / 32;
- uint32_t mask = 1 << (script % 32);
- IDNScriptWhiteList[index] |= mask;
- }
- }
- }
- fclose(file);
- return YES;
-}
-
-static void readIDNScriptWhiteList(void)
-{
- // Read white list from library.
- NSArray *dirs = NSSearchPathForDirectoriesInDomains(NSLibraryDirectory, NSAllDomainsMask, YES);
- int i, numDirs = [dirs count];
- for (i = 0; i < numDirs; i++) {
- NSString *dir = [dirs objectAtIndex:i];
- if (readIDNScriptWhiteListFile([dir stringByAppendingPathComponent:@"IDNScriptWhiteList.txt"])) {
- return;
- }
- }
-
- // Fall back on white list inside bundle.
- NSBundle *bundle = [NSBundle bundleWithIdentifier:@"com.apple.WebKit"];
- readIDNScriptWhiteListFile([bundle pathForResource:@"IDNScriptWhiteList" ofType:@"txt"]);
-}
-
-static BOOL allCharactersInIDNScriptWhiteList(const UChar *buffer, int32_t length)
-{
- pthread_once(&IDNScriptWhiteListFileRead, readIDNScriptWhiteList);
-
- int32_t i = 0;
- while (i < length) {
- UChar32 c;
- U16_NEXT(buffer, i, length, c)
- UErrorCode error = U_ZERO_ERROR;
- UScriptCode script = uscript_getScript(c, &error);
- if (error != U_ZERO_ERROR) {
- LOG_ERROR("got ICU error while trying to look at scripts: %d", error);
- return NO;
- }
- if (script < 0) {
- LOG_ERROR("got negative number for script code from ICU: %d", script);
- return NO;
- }
- if (script >= USCRIPT_CODE_LIMIT) {
- return NO;
- }
- size_t index = script / 32;
- uint32_t mask = 1 << (script % 32);
- if (!(IDNScriptWhiteList[index] & mask)) {
- return NO;
- }
-
- if (isLookalikeCharacter(c))
- return NO;
- }
- return YES;
-}
-
-// Return value of nil means no mapping is necessary.
-// If makeString is NO, then return value is either nil or self to indicate mapping is necessary.
-// If makeString is YES, then return value is either nil or the mapped string.
-- (NSString *)_web_mapHostNameWithRange:(NSRange)range encode:(BOOL)encode makeString:(BOOL)makeString
-{
- if (range.length > HOST_NAME_BUFFER_LENGTH) {
- return nil;
- }
-
- if ([self length] == 0)
- return nil;
-
- UChar sourceBuffer[HOST_NAME_BUFFER_LENGTH];
- UChar destinationBuffer[HOST_NAME_BUFFER_LENGTH];
-
- NSString *string = self;
- if (encode && [self rangeOfString:@"%" options:NSLiteralSearch range:range].location != NSNotFound) {
- NSString *substring = [self substringWithRange:range];
- substring = WebCFAutorelease(CFURLCreateStringByReplacingPercentEscapes(NULL, (CFStringRef)substring, CFSTR("")));
- if (substring != nil) {
- string = substring;
- range = NSMakeRange(0, [string length]);
- }
- }
-
- int length = range.length;
- [string getCharacters:sourceBuffer range:range];
-
- UErrorCode error = U_ZERO_ERROR;
- int32_t numCharactersConverted = (encode ? uidna_IDNToASCII : uidna_IDNToUnicode)
- (sourceBuffer, length, destinationBuffer, HOST_NAME_BUFFER_LENGTH, UIDNA_ALLOW_UNASSIGNED, NULL, &error);
- if (error != U_ZERO_ERROR) {
- return nil;
- }
- if (numCharactersConverted == length && memcmp(sourceBuffer, destinationBuffer, length * sizeof(UChar)) == 0) {
- return nil;
- }
- if (!encode && !allCharactersInIDNScriptWhiteList(destinationBuffer, numCharactersConverted)) {
- return nil;
- }
- return makeString ? (NSString *)[NSString stringWithCharacters:destinationBuffer length:numCharactersConverted] : (NSString *)self;
-}
-
-- (BOOL)_web_hostNameNeedsDecodingWithRange:(NSRange)range
-{
- return [self _web_mapHostNameWithRange:range encode:NO makeString:NO] != nil;
-}
-
-- (BOOL)_web_hostNameNeedsEncodingWithRange:(NSRange)range
-{
- return [self _web_mapHostNameWithRange:range encode:YES makeString:NO] != nil;
-}
-
-- (NSString *)_web_decodeHostNameWithRange:(NSRange)range
-{
- return [self _web_mapHostNameWithRange:range encode:NO makeString:YES];
-}
-
-- (NSString *)_web_encodeHostNameWithRange:(NSRange)range
-{
- return [self _web_mapHostNameWithRange:range encode:YES makeString:YES];
-}
-
-- (NSString *)_web_decodeHostName
-{
- NSString *name = [self _web_mapHostNameWithRange:NSMakeRange(0, [self length]) encode:NO makeString:YES];
- return name == nil ? self : name;
-}
-
-- (NSString *)_web_encodeHostName
-{
- NSString *name = [self _web_mapHostNameWithRange:NSMakeRange(0, [self length]) encode:YES makeString:YES];
- return name == nil ? self : name;
-}
-
--(NSRange)_webkit_rangeOfURLScheme
-{
- NSRange colon = [self rangeOfString:@":"];
- if (colon.location != NSNotFound && colon.location > 0) {
- NSRange scheme = {0, colon.location};
- static NSCharacterSet *InverseSchemeCharacterSet = nil;
- if (!InverseSchemeCharacterSet) {
- /*
- This stuff is very expensive. 10-15 msec on a 2x1.2GHz. If not cached it swamps
- everything else when adding items to the autocomplete DB. Makes me wonder if we
- even need to enforce the character set here.
- */
- NSString *acceptableCharacters = @"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+.-";
- InverseSchemeCharacterSet = [[[NSCharacterSet characterSetWithCharactersInString:acceptableCharacters] invertedSet] retain];
- }
- NSRange illegals = [self rangeOfCharacterFromSet:InverseSchemeCharacterSet options:0 range:scheme];
- if (illegals.location == NSNotFound)
- return scheme;
- }
- return NSMakeRange(NSNotFound, 0);
-}
-
--(BOOL)_webkit_looksLikeAbsoluteURL
-{
- // Trim whitespace because _web_URLWithString allows whitespace.
- return [[self _webkit_stringByTrimmingWhitespace] _webkit_rangeOfURLScheme].location != NSNotFound;
-}
-
-- (NSString *)_webkit_URLFragment
-{
- NSRange fragmentRange;
-
- fragmentRange = [self rangeOfString:@"#" options:NSLiteralSearch];
- if (fragmentRange.location == NSNotFound)
- return nil;
- return [self substringFromIndex:fragmentRange.location + 1];
-}
-
-@end
</del></span></pre></div>
<a id="trunkWebKitMiscWebNSURLExtrasmmfromrev24810trunkWebKitMiscWebNSURLExtrasm"></a>
<div class="copfile"><h4>Copied: trunk/WebKit/Misc/WebNSURLExtras.mm (from rev 24810, trunk/WebKit/Misc/WebNSURLExtras.m) (0 => 24812)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/WebKit/Misc/WebNSURLExtras.mm         (rev 0)
+++ trunk/WebKit/Misc/WebNSURLExtras.mm        2007-08-02 01:18:37 UTC (rev 24812)
</span><span class="lines">@@ -0,0 +1,1029 @@
</span><ins>+/*
+ * Copyright (C) 2005 Apple Computer, Inc. All rights reserved.
+ * Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of
+ * its contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#import <WebKit/WebNSURLExtras.h>
+
+#import <JavaScriptCore/Assertions.h>
+#import <WebKit/WebKitNSStringExtras.h>
+#import <WebKit/WebNSDataExtras.h>
+#import <WebKit/WebNSObjectExtras.h>
+#import <WebKit/WebLocalizableStrings.h>
+#import <WebCore/LoaderNSURLExtras.h>
+
+#import <WebKitSystemInterface.h>
+#import "WebSystemInterface.h"
+
+#import <Foundation/NSURLRequest.h>
+
+#import <unicode/uchar.h>
+#import <unicode/uidna.h>
+#import <unicode/uscript.h>
+
+typedef void (* StringRangeApplierFunction)(NSString *string, NSRange range, void *context);
+
+// Needs to be big enough to hold an IDN-encoded name.
+// For host names bigger than this, we won't do IDN encoding, which is almost certainly OK.
+#define HOST_NAME_BUFFER_LENGTH 2048
+
+#define URL_BYTES_BUFFER_LENGTH 2048
+
+static pthread_once_t IDNScriptWhiteListFileRead = PTHREAD_ONCE_INIT;
+static uint32_t IDNScriptWhiteList[(USCRIPT_CODE_LIMIT + 31) / 32];
+
+static inline BOOL isLookalikeCharacter(int charCode)
+{
+// FIXME: Move this code down into WebCore so it can be shared with other platforms.
+
+// This function treats the following as unsafe, lookalike characters:
+// any non-printable character, any character considered as whitespace that isn't already converted to a space by ICU,
+// and any ignorable character.
+
+// We also considered the characters in Mozilla's blacklist (http://kb.mozillazine.org/Network.IDN.blacklist_chars),
+// and included all of these characters that ICU can encode.
+
+ if (!u_isprint(charCode) || u_isUWhiteSpace(charCode) || u_hasBinaryProperty(charCode, UCHAR_DEFAULT_IGNORABLE_CODE_POINT))
+ return YES;
+
+ switch (charCode) {
+ case 0x01C3: /* LATIN LETTER RETROFLEX CLICK */
+ case 0x0337: /* COMBINING SHORT SOLIDUS OVERLAY */
+ case 0x0338: /* COMBINING LONG SOLIDUS OVERLAY */
+ case 0x05B4: /* HEBREW POINT HIRIQ */
+ case 0x05BC: /* HEBREW POINT DAGESH OR MAPIQ */
+ case 0x05C3: /* HEBREW PUNCTUATION SOF PASUQ */
+ case 0x05F4: /* HEBREW PUNCTUATION GERSHAYIM */
+ case 0x0660: /* ARABIC INDIC DIGIT ZERO */
+ case 0x06D4: /* ARABIC FULL STOP */
+ case 0x06F0: /* EXTENDED ARABIC INDIC DIGIT ZERO */
+ case 0x2027: /* HYPHENATION POINT */
+ case 0x2039: /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */
+ case 0x203A: /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */
+ case 0x2044: /* FRACTION SLASH */
+ case 0x2215: /* DIVISION SLASH */
+ case 0x23ae: /* INTEGRAL EXTENSION */
+ case 0x2571: /* BOX DRAWINGS LIGHT DIAGONAL UPPER RIGHT TO LOWER LEFT */
+ case 0x29F8: /* BIG SOLIDUS */
+ case 0x29f6: /* SOLIDUS WITH OVERBAR */
+ case 0x2AFB: /* TRIPLE SOLIDUS BINARY RELATION */
+ case 0x2AFD: /* DOUBLE SOLIDUS OPERATOR */
+ case 0x3008: /* LEFT ANGLE BRACKET */
+ case 0x3014: /* LEFT TORTOISE SHELL BRACKET */
+ case 0x3015: /* RIGHT TORTOISE SHELL BRACKET */
+ case 0x3033: /* VERTICAL KANA REPEAT MARK UPPER HALF */
+ case 0x321D: /* PARENTHESIZED KOREAN CHARACTER OJEON */
+ case 0x321E: /* PARENTHESIZED KOREAN CHARACTER O HU */
+ case 0x33DF: /* SQUARE A OVER M */
+ case 0xFE14: /* PRESENTATION FORM FOR VERTICAL SEMICOLON */
+ case 0xFE15: /* PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK */
+ case 0xFE3F: /* PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET */
+ case 0xFE5D: /* SMALL LEFT TORTOISE SHELL BRACKET */
+ case 0xFE5E: /* SMALL RIGHT TORTOISE SHELL BRACKET */
+ return YES;
+ default:
+ return NO;
+ }
+}
+
+static char hexDigit(int i)
+{
+ if (i < 0 || i > 16) {
+ LOG_ERROR("illegal hex digit");
+ return '0';
+ }
+ int h = i;
+ if (h >= 10) {
+ h = h - 10 + 'A';
+ }
+ else {
+ h += '0';
+ }
+ return h;
+}
+
+static BOOL isHexDigit(char c)
+{
+ return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
+}
+
+static int hexDigitValue(char c)
+{
+ if (c >= '0' && c <= '9') {
+ return c - '0';
+ }
+ if (c >= 'A' && c <= 'F') {
+ return c - 'A' + 10;
+ }
+ if (c >= 'a' && c <= 'f') {
+ return c - 'a' + 10;
+ }
+ LOG_ERROR("illegal hex digit");
+ return 0;
+}
+
+static void applyHostNameFunctionToMailToURLString(NSString *string, StringRangeApplierFunction f, void *context)
+{
+ // In a mailto: URL, host names come after a '@' character and end with a '>' or ',' or '?' character.
+ // Skip quoted strings so that characters in them don't confuse us.
+ // When we find a '?' character, we are past the part of the URL that contains host names.
+
+ static NSCharacterSet *hostNameOrStringStartCharacters;
+ if (hostNameOrStringStartCharacters == nil) {
+ hostNameOrStringStartCharacters = [NSCharacterSet characterSetWithCharactersInString:@"\"@?"];
+ CFRetain(hostNameOrStringStartCharacters);
+ }
+ static NSCharacterSet *hostNameEndCharacters;
+ if (hostNameEndCharacters == nil) {
+ hostNameEndCharacters = [NSCharacterSet characterSetWithCharactersInString:@">,?"];
+ CFRetain(hostNameEndCharacters);
+ }
+ static NSCharacterSet *quotedStringCharacters;
+ if (quotedStringCharacters == nil) {
+ quotedStringCharacters = [NSCharacterSet characterSetWithCharactersInString:@"\"\\"];
+ CFRetain(quotedStringCharacters);
+ }
+
+ unsigned stringLength = [string length];
+ NSRange remaining = NSMakeRange(0, stringLength);
+
+ while (1) {
+ // Find start of host name or of quoted string.
+ NSRange hostNameOrStringStart = [string rangeOfCharacterFromSet:hostNameOrStringStartCharacters options:0 range:remaining];
+ if (hostNameOrStringStart.location == NSNotFound) {
+ return;
+ }
+ unichar c = [string characterAtIndex:hostNameOrStringStart.location];
+ remaining.location = NSMaxRange(hostNameOrStringStart);
+ remaining.length = stringLength - remaining.location;
+
+ if (c == '?') {
+ return;
+ }
+
+ if (c == '@') {
+ // Find end of host name.
+ unsigned hostNameStart = remaining.location;
+ NSRange hostNameEnd = [string rangeOfCharacterFromSet:hostNameEndCharacters options:0 range:remaining];
+ BOOL done;
+ if (hostNameEnd.location == NSNotFound) {
+ hostNameEnd.location = stringLength;
+ done = YES;
+ } else {
+ remaining.location = hostNameEnd.location;
+ remaining.length = stringLength - remaining.location;
+ done = NO;
+ }
+
+ // Process host name range.
+ f(string, NSMakeRange(hostNameStart, hostNameEnd.location - hostNameStart), context);
+
+ if (done) {
+ return;
+ }
+ } else {
+ // Skip quoted string.
+ ASSERT(c == '"');
+ while (1) {
+ NSRange escapedCharacterOrStringEnd = [string rangeOfCharacterFromSet:quotedStringCharacters options:0 range:remaining];
+ if (escapedCharacterOrStringEnd.location == NSNotFound) {
+ return;
+ }
+ c = [string characterAtIndex:escapedCharacterOrStringEnd.location];
+ remaining.location = NSMaxRange(escapedCharacterOrStringEnd);
+ remaining.length = stringLength - remaining.location;
+
+ // If we are the end of the string, then break from the string loop back to the host name loop.
+ if (c == '"') {
+ break;
+ }
+
+ // Skip escaped character.
+ ASSERT(c == '\\');
+ if (remaining.length == 0) {
+ return;
+ }
+ remaining.location += 1;
+ remaining.length -= 1;
+ }
+ }
+ }
+}
+
+static void applyHostNameFunctionToURLString(NSString *string, StringRangeApplierFunction f, void *context)
+{
+ // Find hostnames. Too bad we can't use any real URL-parsing code to do this,
+ // but we have to do it before doing all the %-escaping, and this is the only
+ // code we have that parses mailto URLs anyway.
+
+ // Maybe we should implement this using a character buffer instead?
+
+ if ([string _webkit_hasCaseInsensitivePrefix:@"mailto:"]) {
+ applyHostNameFunctionToMailToURLString(string, f, context);
+ return;
+ }
+
+ // Find the host name in a hierarchical URL.
+ // It comes after a "://" sequence, with scheme characters preceding.
+ // If ends with the end of the string or a ":", "/", or a "?".
+ // If there is a "@" character, the host part is just the part after the "@".
+ NSRange separatorRange = [string rangeOfString:@"://"];
+ if (separatorRange.location == NSNotFound) {
+ return;
+ }
+
+ // Check that all characters before the :// are valid scheme characters.
+ static NSCharacterSet *nonSchemeCharacters;
+ if (nonSchemeCharacters == nil) {
+ nonSchemeCharacters = [[NSCharacterSet characterSetWithCharactersInString:@"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-."] invertedSet];
+ CFRetain(nonSchemeCharacters);
+ }
+ if ([string rangeOfCharacterFromSet:nonSchemeCharacters options:0 range:NSMakeRange(0, separatorRange.location)].location != NSNotFound) {
+ return;
+ }
+
+ unsigned stringLength = [string length];
+
+ static NSCharacterSet *hostTerminators;
+ if (hostTerminators == nil) {
+ hostTerminators = [NSCharacterSet characterSetWithCharactersInString:@":/?#"];
+ CFRetain(hostTerminators);
+ }
+
+ // Start after the separator.
+ unsigned authorityStart = NSMaxRange(separatorRange);
+
+ // Find terminating character.
+ NSRange hostNameTerminator = [string rangeOfCharacterFromSet:hostTerminators options:0 range:NSMakeRange(authorityStart, stringLength - authorityStart)];
+ unsigned hostNameEnd = hostNameTerminator.location == NSNotFound ? stringLength : hostNameTerminator.location;
+
+ // Find "@" for the start of the host name.
+ NSRange userInfoTerminator = [string rangeOfString:@"@" options:0 range:NSMakeRange(authorityStart, hostNameEnd - authorityStart)];
+ unsigned hostNameStart = userInfoTerminator.location == NSNotFound ? authorityStart : NSMaxRange(userInfoTerminator);
+
+ f(string, NSMakeRange(hostNameStart, hostNameEnd - hostNameStart), context);
+}
+
+@implementation NSURL (WebNSURLExtras)
+
+static void collectRangesThatNeedMapping(NSString *string, NSRange range, void *context, BOOL encode)
+{
+ BOOL needsMapping = encode
+ ? [string _web_hostNameNeedsEncodingWithRange:range]
+ : [string _web_hostNameNeedsDecodingWithRange:range];
+ if (!needsMapping) {
+ return;
+ }
+
+ NSMutableArray **array = (NSMutableArray **)context;
+ if (*array == nil) {
+ *array = [[NSMutableArray alloc] init];
+ }
+
+ [*array addObject:[NSValue valueWithRange:range]];
+}
+
+static void collectRangesThatNeedEncoding(NSString *string, NSRange range, void *context)
+{
+ return collectRangesThatNeedMapping(string, range, context, YES);
+}
+
+static void collectRangesThatNeedDecoding(NSString *string, NSRange range, void *context)
+{
+ return collectRangesThatNeedMapping(string, range, context, NO);
+}
+
+static NSString *mapHostNames(NSString *string, BOOL encode)
+{
+ // Generally, we want to optimize for the case where there is one host name that does not need mapping.
+
+ if (encode && [string canBeConvertedToEncoding:NSASCIIStringEncoding])
+ return string;
+
+ // Make a list of ranges that actually need mapping.
+ NSMutableArray *hostNameRanges = nil;
+ StringRangeApplierFunction f = encode
+ ? collectRangesThatNeedEncoding
+ : collectRangesThatNeedDecoding;
+ applyHostNameFunctionToURLString(string, f, &hostNameRanges);
+ if (hostNameRanges == nil)
+ return string;
+
+ // Do the mapping.
+ NSMutableString *mutableCopy = [string mutableCopy];
+ unsigned i = [hostNameRanges count];
+ while (i-- != 0) {
+ NSRange hostNameRange = [[hostNameRanges objectAtIndex:i] rangeValue];
+ NSString *mappedHostName = encode
+ ? [string _web_encodeHostNameWithRange:hostNameRange]
+ : [string _web_decodeHostNameWithRange:hostNameRange];
+ [mutableCopy replaceCharactersInRange:hostNameRange withString:mappedHostName];
+ }
+ [hostNameRanges release];
+ return [mutableCopy autorelease];
+}
+
++ (NSURL *)_web_URLWithUserTypedString:(NSString *)string relativeToURL:(NSURL *)URL
+{
+ if (string == nil) {
+ return nil;
+ }
+ string = mapHostNames([string _webkit_stringByTrimmingWhitespace], YES);
+
+ NSData *userTypedData = [string dataUsingEncoding:NSUTF8StringEncoding];
+ ASSERT(userTypedData);
+
+ const UInt8 *inBytes = [userTypedData bytes];
+ int inLength = [userTypedData length];
+ if (inLength == 0) {
+ return [NSURL URLWithString:@""];
+ }
+
+ char *outBytes = malloc(inLength * 3); // large enough to %-escape every character
+ char *p = outBytes;
+ int outLength = 0;
+ int i;
+ for (i = 0; i < inLength; i++) {
+ UInt8 c = inBytes[i];
+ if (c <= 0x20 || c >= 0x7f) {
+ *p++ = '%';
+ *p++ = hexDigit(c >> 4);
+ *p++ = hexDigit(c & 0xf);
+ outLength += 3;
+ }
+ else {
+ *p++ = c;
+ outLength++;
+ }
+ }
+
+ NSData *data = [NSData dataWithBytesNoCopy:outBytes length:outLength]; // adopts outBytes
+ return [self _web_URLWithData:data relativeToURL:URL];
+}
+
++ (NSURL *)_web_URLWithUserTypedString:(NSString *)string
+{
+ return [self _web_URLWithUserTypedString:string relativeToURL:nil];
+}
+
++ (NSURL *)_web_URLWithDataAsString:(NSString *)string
+{
+ if (string == nil) {
+ return nil;
+ }
+ return [self _web_URLWithDataAsString:string relativeToURL:nil];
+}
+
++ (NSURL *)_web_URLWithDataAsString:(NSString *)string relativeToURL:(NSURL *)baseURL
+{
+ if (string == nil) {
+ return nil;
+ }
+ string = [string _webkit_stringByTrimmingWhitespace];
+ NSData *data = [string dataUsingEncoding:NSISOLatin1StringEncoding];
+ return [self _web_URLWithData:data relativeToURL:baseURL];
+}
+
++ (NSURL *)_web_URLWithData:(NSData *)data
+{
+ return urlWithData(data);
+}
+
++ (NSURL *)_web_URLWithData:(NSData *)data relativeToURL:(NSURL *)baseURL
+{
+ return urlWithDataRelativeToURL(data, baseURL);
+}
+
+- (NSData *)_web_originalData
+{
+ return urlOriginalData(self);
+}
+
+- (NSString *)_web_originalDataAsString
+{
+ return urlOriginalDataAsString(self);
+}
+
+- (NSString *)_web_userVisibleString
+{
+ NSData *data = [self _web_originalData];
+ const unsigned char *before = [data bytes];
+ int length = [data length];
+
+ bool needsHostNameDecoding = false;
+
+ const unsigned char *p = before;
+ int bufferLength = (length * 3) + 1;
+ char *after = malloc(bufferLength); // large enough to %-escape every character
+ char *q = after;
+ int i;
+ for (i = 0; i < length; i++) {
+ unsigned char c = p[i];
+ // escape control characters, space, and delete
+ if (c <= 0x20 || c == 0x7f) {
+ *q++ = '%';
+ *q++ = hexDigit(c >> 4);
+ *q++ = hexDigit(c & 0xf);
+ }
+ // unescape escape sequences that indicate bytes greater than 0x7f
+ else if (c == '%' && (i + 1 < length && isHexDigit(p[i + 1])) && i + 2 < length && isHexDigit(p[i + 2])) {
+ unsigned char u = (hexDigitValue(p[i + 1]) << 4) | hexDigitValue(p[i + 2]);
+ if (u > 0x7f) {
+ // unescape
+ *q++ = u;
+ }
+ else {
+ // do not unescape
+ *q++ = p[i];
+ *q++ = p[i + 1];
+ *q++ = p[i + 2];
+ }
+ i += 2;
+ }
+ else {
+ *q++ = c;
+
+ // Check for "xn--" in an efficient, non-case-sensitive, way.
+ &nbs