Commit 8291e207 authored by mitz@apple.com's avatar mitz@apple.com

WebCore:

        Reviewed by Darin Adler.

        - fix <rdar://problem/5825683> Three slash URLs are modified by WebKit

        Test: fast/loader/url-parse-1.html

        URIs in which the scheme is followed by "://" are ambiguous, because
        "//" can be either the beginning of a net_path or the beginning of an
        abs_path whose first path segment is empty. In the case of ":///", the
        net_path interpretation is invalid, because the authority (the part
        between the second and third slashes) cannot be empty. However, for
        historical reasons, this is allowed in http:, https: and file: URLs,
        in which an empty authority means the local host.

        * platform/KURL.cpp:
        (WebCore::KURL::parse): Changed to interpret URLs in which the scheme
        is followed by ":///" as abs_path-only URLs, unless the scheme is http,
        https or file.

LayoutTests:

        Reviewed by Darin Adler.

        - test for <rdar://problem/5825683> Three slash URLs are modified by WebKit

        * fast/loader/url-parse-1-expected.txt: Added.
        * fast/loader/url-parse-1.html: Added.



git-svn-id: http://svn.webkit.org/repository/webkit/trunk@34579 268f45cc-cd09-0410-ab3c-d52691b4dbfc
parent 980c614d
2008-06-15 Dan Bernstein <mitz@apple.com>
Reviewed by Darin Adler.
- test for <rdar://problem/5825683> Three slash URLs are modified by WebKit
* fast/loader/url-parse-1-expected.txt: Added.
* fast/loader/url-parse-1.html: Added.
2008-06-15 Darin Adler <darin@apple.com>
Reviewed and tweaked by Sam Weinig.
The following table shows how URLs are parsed and canonicalized. The base URI for this document is file:///BASE/.
URL href host pathname
file:///BASE/ /BASE/
test file:///BASE/test /BASE/test
/ file:/// /
/test file:///test /test
// file://
//test file://test test
/// file:/// /
///test file:///test /test
file: file:///BASE/ /BASE/
file:test file:///BASE/test /BASE/test
file:/ file:/// /
file:/test file:///test /test
file:// file://
file://test file://test test
file:/// file:/// /
file:///test file:///test /test
file://localhost file://localhost
file://localhost/ file:/// /
file://localhost/test file:///test /test
http: http:/ /
http:/ http:/ /
http:// http:// /
http:/// http:/// /
http://// http://// //
http://localhost http://localhost/ localhost /
http://localhost/ http://localhost/ localhost /
http://localhost/test http://localhost/test localhost /test
x-webkit: x-webkit:
x-webkit:test x-webkit:test test
x-webkit:/ x-webkit:/ /
x-webkit:/test x-webkit:/test /test
x-webkit:// x-webkit:// //
x-webkit://test x-webkit://test test
x-webkit:/// x-webkit:/// ///
x-webkit:///test x-webkit:///test ///test
<head>
<base href="file:///BASE/">
<style>
table { border-spacing: 0; border-collapse: collapse; border: 1px solid #999; }
thead { font: x-small 'Lucida Grande'; }
thead > tr { background-color: #dee; border-bottom: 1px solid #999; }
td, th { padding: 4px; border-width: 0 1px; border-style: solid; border-color: #999 }
tbody { font: small monospace; }
tbody > tr:nth-child(odd) { background-color: #eee; }
</style>
</head>
<body>
<p>
The following table shows how URLs are parsed and canonicalized. The base URI
for this document is <tt>file:///BASE/</tt>.
</p>
<table>
<thead>
<tr>
<th>URL</th>
<th>href</th>
<th>host</th>
<th>pathname</th>
</tr>
</thead>
<tbody id = "results"></tbody>
</table>
<script>
if (window.layoutTestController)
layoutTestController.dumpAsText();
function test(url)
{
var anchor = document.createElement("a");
anchor.href = url;
var row = document.getElementById("results").appendChild(document.createElement("tr"));
row.appendChild(document.createElement("td")).appendChild(anchor).appendChild(document.createTextNode(url));
row.appendChild(document.createElement("td")).appendChild(document.createTextNode(anchor.href));
row.appendChild(document.createElement("td")).appendChild(document.createTextNode(anchor.host));
row.appendChild(document.createElement("td")).appendChild(document.createTextNode(anchor.pathname));
}
var testCases = [
"",
"test",
"/",
"/test",
"//",
"//test",
"///",
"///test",
"file:",
"file:test",
"file:/",
"file:/test",
"file://",
"file://test",
"file:///",
"file:///test",
"file://localhost",
"file://localhost/",
"file://localhost/test",
"http:",
"http:/",
"http://",
"http:///",
"http:////",
"http://localhost",
"http://localhost/",
"http://localhost/test",
"x-webkit:",
"x-webkit:test",
"x-webkit:/",
"x-webkit:/test",
"x-webkit://",
"x-webkit://test",
"x-webkit:///",
"x-webkit:///test",
];
for (var i = 0; i < testCases.length; ++i)
test(testCases[i]);
</script>
</body>
2008-06-15 Dan Bernstein <mitz@apple.com>
Reviewed by Darin Adler.
- fix <rdar://problem/5825683> Three slash URLs are modified by WebKit
Test: fast/loader/url-parse-1.html
URIs in which the scheme is followed by "://" are ambiguous, because
"//" can be either the beginning of a net_path or the beginning of an
abs_path whose first path segment is empty. In the case of ":///", the
net_path interpretation is invalid, because the authority (the part
between the second and third slashes) cannot be empty. However, for
historical reasons, this is allowed in http:, https: and file: URLs,
in which an empty authority means the local host.
* platform/KURL.cpp:
(WebCore::KURL::parse): Changed to interpret URLs in which the scheme
is followed by ":///" as abs_path-only URLs, unless the scheme is http,
https or file.
2008-06-15 Darin Adler <darin@apple.com>
Rubber stamped by Sam.
......@@ -978,8 +978,21 @@ void KURL::parse(const char* url, const String* originalString)
bool hierarchical = url[schemeEnd + 1] == '/';
bool isFile = schemeEnd == 4
&& matchLetter(url[0], 'f')
&& matchLetter(url[1], 'i')
&& matchLetter(url[2], 'l')
&& matchLetter(url[3], 'e');
bool isHTTPorHTTPS = matchLetter(url[0], 'h')
&& matchLetter(url[1], 't')
&& matchLetter(url[2], 't')
&& matchLetter(url[3], 'p')
&& (url[4] == ':' || (matchLetter(url[4], 's') && url[5] == ':'));
if (hierarchical && url[schemeEnd + 2] == '/') {
// part after the scheme must be a net_path, parse the authority section
// The part after the scheme is either a net_path or an abs_path whose first path segment is empty.
// Attempt to find an authority.
// FIXME: Authority characters may be scanned twice, and it would be nice to be faster.
userStart += 2;
......@@ -998,9 +1011,9 @@ void KURL::parse(const char* url, const String* originalString)
passwordEnd = userEnd;
userEnd = colonPos;
passwordStart = colonPos + 1;
} else {
} else
passwordStart = passwordEnd = userEnd;
}
hostStart = passwordEnd + 1;
} else if (url[userEnd] == '[' || isPathSegmentEndChar(url[userEnd])) {
// hit the end of the authority, must have been no user
......@@ -1043,9 +1056,8 @@ void KURL::parse(const char* url, const String* originalString)
portEnd = portStart;
while (isASCIIDigit(url[portEnd]))
portEnd++;
} else {
} else
portStart = portEnd = hostEnd;
}
if (!isPathSegmentEndChar(url[portEnd])) {
// invalid character
......@@ -1053,6 +1065,19 @@ void KURL::parse(const char* url, const String* originalString)
invalidate();
return;
}
if (userStart == portEnd && !isHTTPorHTTPS && !isFile) {
// No authority found, which means that this is not a net_path, but rather an abs_path whose first two
// path segments are empty. For file, http and https only, an empty authority is allowed.
userStart -= 2;
userEnd = userStart;
passwordStart = userEnd;
passwordEnd = passwordStart;
hostStart = passwordEnd;
hostEnd = hostStart;
portStart = hostEnd;
portEnd = hostEnd;
}
} else {
// the part after the scheme must be an opaque_part or an abs_path
userEnd = userStart;
......@@ -1095,12 +1120,6 @@ void KURL::parse(const char* url, const String* originalString)
*p++ = *strPtr++;
m_schemeEnd = p - buffer.data();
bool isHTTPorHTTPS = matchLetter(url[0], 'h')
&& matchLetter(url[1], 't')
&& matchLetter(url[2], 't')
&& matchLetter(url[3], 'p')
&& (url[4] == ':' || (matchLetter(url[4], 's') && url[5] == ':'));
bool hostIsLocalHost = portEnd - userStart == 9
&& matchLetter(url[userStart], 'l')
&& matchLetter(url[userStart+1], 'o')
......@@ -1112,12 +1131,6 @@ void KURL::parse(const char* url, const String* originalString)
&& matchLetter(url[userStart+7], 's')
&& matchLetter(url[userStart+8], 't');
bool isFile = matchLetter(url[0], 'f')
&& matchLetter(url[1], 'i')
&& matchLetter(url[2], 'l')
&& matchLetter(url[3], 'e')
&& url[4] == ':';
// File URLs need a host part unless it is just file:// or file://localhost
bool degenFilePath = pathStart == pathEnd && (hostStart == hostEnd || hostIsLocalHost);
......@@ -1172,9 +1185,8 @@ void KURL::parse(const char* url, const String* originalString)
*p++ = *strPtr++;
}
m_portEnd = p - buffer.data();
} else {
} else
m_userStart = m_userEnd = m_passwordEnd = m_hostEnd = m_portEnd = p - buffer.data();
}
// For canonicalization, ensure we have a '/' for no path.
// Only do this for http and https.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment