mirror of
https://github.com/darlinghq/darling-WTF.git
synced 2024-11-26 21:30:30 +00:00
1160 lines
32 KiB
C++
1160 lines
32 KiB
C++
/*
|
|
* Copyright (C) 2004-2020 Apple Inc. All rights reserved.
|
|
* Copyright (C) 2012 Research In Motion Limited. All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
|
|
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
|
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "config.h"
|
|
#include <wtf/URL.h>
|
|
|
|
#include "URLParser.h"
|
|
#include <stdio.h>
|
|
#include <unicode/uidna.h>
|
|
#include <wtf/HashMap.h>
|
|
#include <wtf/NeverDestroyed.h>
|
|
#include <wtf/PrintStream.h>
|
|
#include <wtf/StdLibExtras.h>
|
|
#include <wtf/UUID.h>
|
|
#include <wtf/text/CString.h>
|
|
#include <wtf/text/StringBuilder.h>
|
|
#include <wtf/text/StringHash.h>
|
|
#include <wtf/text/TextStream.h>
|
|
|
|
namespace WTF {
|
|
|
|
void URL::invalidate()
|
|
{
|
|
m_isValid = false;
|
|
m_protocolIsInHTTPFamily = false;
|
|
m_cannotBeABaseURL = false;
|
|
m_schemeEnd = 0;
|
|
m_userStart = 0;
|
|
m_userEnd = 0;
|
|
m_passwordEnd = 0;
|
|
m_hostEnd = 0;
|
|
m_portLength = 0;
|
|
m_pathEnd = 0;
|
|
m_pathAfterLastSlash = 0;
|
|
m_queryEnd = 0;
|
|
}
|
|
|
|
URL::URL(const URL& base, const String& relative, const URLTextEncoding* encoding)
|
|
{
|
|
*this = URLParser(relative, base, encoding).result();
|
|
}
|
|
|
|
static bool shouldTrimFromURL(UChar character)
|
|
{
|
|
// Ignore leading/trailing whitespace and control characters.
|
|
return character <= ' ';
|
|
}
|
|
|
|
URL URL::isolatedCopy() const
|
|
{
|
|
URL result = *this;
|
|
result.m_string = result.m_string.isolatedCopy();
|
|
return result;
|
|
}
|
|
|
|
StringView URL::lastPathComponent() const
|
|
{
|
|
if (!hasPath())
|
|
return { };
|
|
|
|
unsigned end = m_pathEnd - 1;
|
|
if (m_string[end] == '/')
|
|
--end;
|
|
|
|
size_t start = m_string.reverseFind('/', end);
|
|
if (start < pathStart())
|
|
return { };
|
|
++start;
|
|
|
|
return StringView(m_string).substring(start, end - start + 1);
|
|
}
|
|
|
|
bool URL::hasSpecialScheme() const
|
|
{
|
|
// https://url.spec.whatwg.org/#special-scheme
|
|
return protocolIs("ftp")
|
|
|| protocolIs("file")
|
|
|| protocolIs("http")
|
|
|| protocolIs("https")
|
|
|| protocolIs("ws")
|
|
|| protocolIs("wss");
|
|
}
|
|
|
|
unsigned URL::pathStart() const
|
|
{
|
|
unsigned start = m_hostEnd + m_portLength;
|
|
if (start == m_schemeEnd + 1U
|
|
&& start + 1 < m_string.length()
|
|
&& m_string[start] == '/' && m_string[start + 1] == '.')
|
|
start += 2;
|
|
return start;
|
|
}
|
|
|
|
StringView URL::protocol() const
|
|
{
|
|
if (!m_isValid)
|
|
return { };
|
|
|
|
return StringView(m_string).substring(0, m_schemeEnd);
|
|
}
|
|
|
|
StringView URL::host() const
|
|
{
|
|
if (!m_isValid)
|
|
return { };
|
|
|
|
unsigned start = hostStart();
|
|
return StringView(m_string).substring(start, m_hostEnd - start);
|
|
}
|
|
|
|
Optional<uint16_t> URL::port() const
|
|
{
|
|
return m_portLength ? parseUInt16(StringView(m_string).substring(m_hostEnd + 1, m_portLength - 1)) : WTF::nullopt;
|
|
}
|
|
|
|
String URL::hostAndPort() const
|
|
{
|
|
if (auto port = this->port())
|
|
return makeString(host(), ':', port.value());
|
|
return host().toString();
|
|
}
|
|
|
|
String URL::protocolHostAndPort() const
|
|
{
|
|
if (!hasCredentials())
|
|
return m_string.substring(0, pathStart());
|
|
|
|
return makeString(
|
|
StringView(m_string).substring(0, m_userStart),
|
|
StringView(m_string).substring(hostStart(), pathStart() - hostStart())
|
|
);
|
|
}
|
|
|
|
static Optional<LChar> decodeEscapeSequence(StringView input, unsigned index, unsigned length)
|
|
{
|
|
if (index + 3 > length || input[index] != '%')
|
|
return WTF::nullopt;
|
|
auto digit1 = input[index + 1];
|
|
auto digit2 = input[index + 2];
|
|
if (!isASCIIHexDigit(digit1) || !isASCIIHexDigit(digit2))
|
|
return WTF::nullopt;
|
|
return toASCIIHexValue(digit1, digit2);
|
|
}
|
|
|
|
static String decodeEscapeSequencesFromParsedURL(StringView input)
|
|
{
|
|
ASSERT(input.isAllASCII());
|
|
|
|
auto length = input.length();
|
|
if (length < 3 || !input.contains('%'))
|
|
return input.toString();
|
|
|
|
// FIXME: This 100 is arbitrary. Should make a histogram of how this function is actually used to choose a better value.
|
|
Vector<LChar, 100> percentDecoded;
|
|
percentDecoded.reserveInitialCapacity(length);
|
|
for (unsigned i = 0; i < length; ) {
|
|
if (auto decodedCharacter = decodeEscapeSequence(input, i, length)) {
|
|
percentDecoded.uncheckedAppend(*decodedCharacter);
|
|
i += 3;
|
|
} else {
|
|
percentDecoded.uncheckedAppend(input[i]);
|
|
++i;
|
|
}
|
|
}
|
|
|
|
// FIXME: Is UTF-8 always the correct encoding?
|
|
// FIXME: This returns a null string when we encounter an invalid UTF-8 sequence. Is that OK?
|
|
return String::fromUTF8(percentDecoded.data(), percentDecoded.size());
|
|
}
|
|
|
|
String URL::user() const
|
|
{
|
|
return decodeEscapeSequencesFromParsedURL(encodedUser());
|
|
}
|
|
|
|
String URL::password() const
|
|
{
|
|
return decodeEscapeSequencesFromParsedURL(encodedPassword());
|
|
}
|
|
|
|
StringView URL::encodedUser() const
|
|
{
|
|
return StringView(m_string).substring(m_userStart, m_userEnd - m_userStart);
|
|
}
|
|
|
|
StringView URL::encodedPassword() const
|
|
{
|
|
if (m_passwordEnd == m_userEnd)
|
|
return { };
|
|
|
|
return StringView(m_string).substring(m_userEnd + 1, m_passwordEnd - m_userEnd - 1);
|
|
}
|
|
|
|
StringView URL::fragmentIdentifier() const
|
|
{
|
|
if (!hasFragmentIdentifier())
|
|
return { };
|
|
|
|
return StringView(m_string).substring(m_queryEnd + 1);
|
|
}
|
|
|
|
URL URL::truncatedForUseAsBase() const
|
|
{
|
|
return URL(URL(), m_string.left(m_pathAfterLastSlash));
|
|
}
|
|
|
|
#if !USE(CF)
|
|
|
|
String URL::fileSystemPath() const
|
|
{
|
|
if (!isLocalFile())
|
|
return { };
|
|
|
|
auto result = decodeEscapeSequencesFromParsedURL(path());
|
|
#if PLATFORM(WIN)
|
|
result = fileSystemRepresentation(result);
|
|
#endif
|
|
return result;
|
|
}
|
|
|
|
#endif
|
|
|
|
#if !ASSERT_ENABLED
|
|
|
|
static inline void assertProtocolIsGood(StringView)
|
|
{
|
|
}
|
|
|
|
#else
|
|
|
|
static void assertProtocolIsGood(StringView protocol)
|
|
{
|
|
// FIXME: We probably don't need this function any more.
|
|
// The isASCIIAlphaCaselessEqual function asserts that passed-in characters
|
|
// are ones it can handle; the older code did not and relied on these checks.
|
|
for (auto character : protocol.codeUnits()) {
|
|
ASSERT(isASCII(character));
|
|
ASSERT(character > ' ');
|
|
ASSERT(!isASCIIUpper(character));
|
|
ASSERT(toASCIILowerUnchecked(character) == character);
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
static Lock defaultPortForProtocolMapForTestingLock;
|
|
|
|
using DefaultPortForProtocolMapForTesting = HashMap<String, uint16_t>;
|
|
static DefaultPortForProtocolMapForTesting*& defaultPortForProtocolMapForTesting()
|
|
{
|
|
static DefaultPortForProtocolMapForTesting* defaultPortForProtocolMap;
|
|
return defaultPortForProtocolMap;
|
|
}
|
|
|
|
static DefaultPortForProtocolMapForTesting& ensureDefaultPortForProtocolMapForTesting()
|
|
{
|
|
DefaultPortForProtocolMapForTesting*& defaultPortForProtocolMap = defaultPortForProtocolMapForTesting();
|
|
if (!defaultPortForProtocolMap)
|
|
defaultPortForProtocolMap = new DefaultPortForProtocolMapForTesting;
|
|
return *defaultPortForProtocolMap;
|
|
}
|
|
|
|
void registerDefaultPortForProtocolForTesting(uint16_t port, const String& protocol)
|
|
{
|
|
auto locker = holdLock(defaultPortForProtocolMapForTestingLock);
|
|
ensureDefaultPortForProtocolMapForTesting().add(protocol, port);
|
|
}
|
|
|
|
void clearDefaultPortForProtocolMapForTesting()
|
|
{
|
|
auto locker = holdLock(defaultPortForProtocolMapForTestingLock);
|
|
if (auto* map = defaultPortForProtocolMapForTesting())
|
|
map->clear();
|
|
}
|
|
|
|
Optional<uint16_t> defaultPortForProtocol(StringView protocol)
|
|
{
|
|
if (auto* overrideMap = defaultPortForProtocolMapForTesting()) {
|
|
auto locker = holdLock(defaultPortForProtocolMapForTestingLock);
|
|
ASSERT(overrideMap); // No need to null check again here since overrideMap cannot become null after being non-null.
|
|
auto iterator = overrideMap->find(protocol.toStringWithoutCopying());
|
|
if (iterator != overrideMap->end())
|
|
return iterator->value;
|
|
}
|
|
return URLParser::defaultPortForProtocol(protocol);
|
|
}
|
|
|
|
bool isDefaultPortForProtocol(uint16_t port, StringView protocol)
|
|
{
|
|
return defaultPortForProtocol(protocol) == port;
|
|
}
|
|
|
|
bool URL::protocolIsJavaScript() const
|
|
{
|
|
return WTF::protocolIsJavaScript(string());
|
|
}
|
|
|
|
bool URL::protocolIs(const char* protocol) const
|
|
{
|
|
assertProtocolIsGood(protocol);
|
|
|
|
// JavaScript URLs are "valid" and should be executed even if URL decides they are invalid.
|
|
// The free function protocolIsJavaScript() should be used instead.
|
|
ASSERT(!equalLettersIgnoringASCIICase(StringView(protocol), "javascript"));
|
|
|
|
if (!m_isValid)
|
|
return false;
|
|
|
|
// Do the comparison without making a new string object.
|
|
for (unsigned i = 0; i < m_schemeEnd; ++i) {
|
|
if (!protocol[i] || !isASCIIAlphaCaselessEqual(m_string[i], protocol[i]))
|
|
return false;
|
|
}
|
|
return !protocol[m_schemeEnd]; // We should have consumed all characters in the argument.
|
|
}
|
|
|
|
bool URL::protocolIs(StringView protocol) const
|
|
{
|
|
assertProtocolIsGood(protocol);
|
|
|
|
if (!m_isValid)
|
|
return false;
|
|
|
|
if (m_schemeEnd != protocol.length())
|
|
return false;
|
|
|
|
// Do the comparison without making a new string object.
|
|
for (unsigned i = 0; i < m_schemeEnd; ++i) {
|
|
if (!isASCIIAlphaCaselessEqual(m_string[i], protocol[i]))
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
StringView URL::query() const
|
|
{
|
|
if (m_queryEnd == m_pathEnd)
|
|
return { };
|
|
|
|
return StringView(m_string).substring(m_pathEnd + 1, m_queryEnd - (m_pathEnd + 1));
|
|
}
|
|
|
|
StringView URL::path() const
|
|
{
|
|
if (!m_isValid)
|
|
return { };
|
|
|
|
return StringView(m_string).substring(pathStart(), m_pathEnd - pathStart());
|
|
}
|
|
|
|
bool URL::setProtocol(StringView newProtocol)
|
|
{
|
|
// Firefox and IE remove everything after the first ':'.
|
|
auto newProtocolPrefix = newProtocol.substring(0, newProtocol.find(':'));
|
|
auto newProtocolCanonicalized = URLParser::maybeCanonicalizeScheme(newProtocolPrefix.toStringWithoutCopying());
|
|
if (!newProtocolCanonicalized)
|
|
return false;
|
|
|
|
if (!m_isValid) {
|
|
parse(makeString(*newProtocolCanonicalized, ':', m_string));
|
|
return true;
|
|
}
|
|
|
|
if ((m_passwordEnd != m_userStart || port()) && *newProtocolCanonicalized == "file")
|
|
return true;
|
|
|
|
if (isLocalFile() && host().isEmpty())
|
|
return true;
|
|
|
|
parse(makeString(*newProtocolCanonicalized, StringView(m_string).substring(m_schemeEnd)));
|
|
return true;
|
|
}
|
|
|
|
// Appends the punycoded hostname identified by the given string and length to
|
|
// the output buffer. The result will not be null terminated.
|
|
// Return value of false means error in encoding.
|
|
static bool appendEncodedHostname(Vector<UChar, 512>& buffer, StringView string)
|
|
{
|
|
// Needs to be big enough to hold an IDN-encoded name.
|
|
// For host names bigger than this, we won't do IDN encoding, which is almost certainly OK.
|
|
const unsigned hostnameBufferLength = 2048;
|
|
|
|
if (string.length() > hostnameBufferLength || string.isAllASCII()) {
|
|
append(buffer, string);
|
|
return true;
|
|
}
|
|
|
|
UChar hostnameBuffer[hostnameBufferLength];
|
|
UErrorCode error = U_ZERO_ERROR;
|
|
UIDNAInfo processingDetails = UIDNA_INFO_INITIALIZER;
|
|
int32_t numCharactersConverted = uidna_nameToASCII(&URLParser::internationalDomainNameTranscoder(),
|
|
string.upconvertedCharacters(), string.length(), hostnameBuffer, hostnameBufferLength, &processingDetails, &error);
|
|
|
|
if (U_SUCCESS(error) && !processingDetails.errors) {
|
|
buffer.append(hostnameBuffer, numCharactersConverted);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
unsigned URL::hostStart() const
|
|
{
|
|
return (m_passwordEnd == m_userStart) ? m_passwordEnd : m_passwordEnd + 1;
|
|
}
|
|
|
|
unsigned URL::credentialsEnd() const
|
|
{
|
|
// Include '@' too if we have it.
|
|
unsigned end = m_passwordEnd;
|
|
if (end != m_hostEnd && m_string[end] == '@')
|
|
end += 1;
|
|
return end;
|
|
}
|
|
|
|
static bool forwardSlashHashOrQuestionMark(UChar c)
|
|
{
|
|
return c == '/'
|
|
|| c == '#'
|
|
|| c == '?';
|
|
}
|
|
|
|
static bool slashHashOrQuestionMark(UChar c)
|
|
{
|
|
return forwardSlashHashOrQuestionMark(c) || c == '\\';
|
|
}
|
|
|
|
void URL::setHost(StringView newHost)
|
|
{
|
|
if (!m_isValid)
|
|
return;
|
|
|
|
if (newHost.contains(':') && !newHost.startsWith('['))
|
|
return;
|
|
|
|
if (auto index = newHost.find(hasSpecialScheme() ? slashHashOrQuestionMark : forwardSlashHashOrQuestionMark); index != notFound)
|
|
newHost = newHost.substring(0, index);
|
|
|
|
Vector<UChar, 512> encodedHostName;
|
|
if (!appendEncodedHostname(encodedHostName, newHost))
|
|
return;
|
|
|
|
bool slashSlashNeeded = m_userStart == m_schemeEnd + 1U;
|
|
parse(makeString(
|
|
StringView(m_string).left(hostStart()),
|
|
slashSlashNeeded ? "//" : "",
|
|
StringView(encodedHostName.data(), encodedHostName.size()),
|
|
StringView(m_string).substring(m_hostEnd)
|
|
));
|
|
}
|
|
|
|
void URL::setPort(Optional<uint16_t> port)
|
|
{
|
|
if (!m_isValid)
|
|
return;
|
|
|
|
if (!port) {
|
|
remove(m_hostEnd, m_portLength);
|
|
return;
|
|
}
|
|
|
|
parse(makeString(
|
|
StringView(m_string).left(m_hostEnd),
|
|
':',
|
|
static_cast<unsigned>(*port),
|
|
StringView(m_string).substring(pathStart())
|
|
));
|
|
}
|
|
|
|
void URL::setHostAndPort(StringView hostAndPort)
|
|
{
|
|
if (!m_isValid)
|
|
return;
|
|
|
|
auto hostName = hostAndPort;
|
|
StringView portString;
|
|
auto colonIndex = hostName.reverseFind(':');
|
|
if (colonIndex != notFound) {
|
|
portString = hostName.substring(colonIndex + 1);
|
|
hostName = hostName.substring(0, colonIndex);
|
|
// Multiple colons are acceptable only in case of IPv6.
|
|
if (hostName.contains(':') && !hostName.startsWith('['))
|
|
return;
|
|
if (!parseUInt16(portString))
|
|
portString = { };
|
|
}
|
|
if (hostName.isEmpty()) {
|
|
remove(hostStart(), pathStart() - hostStart());
|
|
return;
|
|
}
|
|
|
|
Vector<UChar, 512> encodedHostName;
|
|
if (!appendEncodedHostname(encodedHostName, hostName))
|
|
return;
|
|
|
|
bool slashSlashNeeded = m_userStart == m_schemeEnd + 1U;
|
|
parse(makeString(
|
|
StringView(m_string).left(hostStart()),
|
|
slashSlashNeeded ? "//" : "",
|
|
StringView(encodedHostName.data(), encodedHostName.size()),
|
|
portString.isEmpty() ? "" : ":",
|
|
portString,
|
|
StringView(m_string).substring(pathStart())
|
|
));
|
|
}
|
|
|
|
static String percentEncodeCharacters(const String& input, bool(*shouldEncode)(UChar))
|
|
{
|
|
auto encode = [shouldEncode] (const String& input) {
|
|
CString utf8 = input.utf8();
|
|
auto* data = utf8.data();
|
|
StringBuilder builder;
|
|
auto length = utf8.length();
|
|
for (unsigned j = 0; j < length; j++) {
|
|
auto c = data[j];
|
|
if (shouldEncode(c)) {
|
|
builder.append('%');
|
|
builder.append(upperNibbleToASCIIHexDigit(c));
|
|
builder.append(lowerNibbleToASCIIHexDigit(c));
|
|
} else
|
|
builder.append(c);
|
|
}
|
|
return builder.toString();
|
|
};
|
|
|
|
for (size_t i = 0; i < input.length(); ++i) {
|
|
if (UNLIKELY(shouldEncode(input[i])))
|
|
return encode(input);
|
|
}
|
|
return input;
|
|
}
|
|
|
|
void URL::parse(const String& string)
|
|
{
|
|
*this = URLParser(string).result();
|
|
}
|
|
|
|
void URL::remove(unsigned start, unsigned length)
|
|
{
|
|
if (!length)
|
|
return;
|
|
ASSERT(start < m_string.length());
|
|
ASSERT(length <= m_string.length() - start);
|
|
|
|
auto stringAfterRemoval = WTFMove(m_string);
|
|
stringAfterRemoval.remove(start, length);
|
|
parse(stringAfterRemoval);
|
|
}
|
|
|
|
void URL::setUser(StringView newUser)
|
|
{
|
|
if (!m_isValid)
|
|
return;
|
|
|
|
unsigned end = m_userEnd;
|
|
if (!newUser.isEmpty()) {
|
|
bool slashSlashNeeded = m_userStart == m_schemeEnd + 1U;
|
|
bool needSeparator = end == m_hostEnd || (end == m_passwordEnd && m_string[end] != '@');
|
|
parse(makeString(
|
|
StringView(m_string).left(m_userStart),
|
|
slashSlashNeeded ? "//" : "",
|
|
percentEncodeCharacters(newUser.toStringWithoutCopying(), URLParser::isInUserInfoEncodeSet),
|
|
needSeparator ? "@" : "",
|
|
StringView(m_string).substring(end)
|
|
));
|
|
} else {
|
|
// Remove '@' if we now have neither user nor password.
|
|
if (m_userEnd == m_passwordEnd && end != m_hostEnd && m_string[end] == '@')
|
|
end += 1;
|
|
remove(m_userStart, end - m_userStart);
|
|
}
|
|
}
|
|
|
|
void URL::setPassword(StringView newPassword)
|
|
{
|
|
if (!m_isValid)
|
|
return;
|
|
|
|
if (!newPassword.isEmpty()) {
|
|
bool needLeadingSlashes = m_userEnd == m_schemeEnd + 1U;
|
|
parse(makeString(
|
|
StringView(m_string).left(m_userEnd),
|
|
needLeadingSlashes ? "//:" : ":",
|
|
percentEncodeCharacters(newPassword.toStringWithoutCopying(), URLParser::isInUserInfoEncodeSet),
|
|
'@',
|
|
StringView(m_string).substring(credentialsEnd())
|
|
));
|
|
} else {
|
|
unsigned end = m_userStart == m_userEnd ? credentialsEnd() : m_passwordEnd;
|
|
remove(m_userEnd, end - m_userEnd);
|
|
}
|
|
}
|
|
|
|
void URL::removeCredentials()
|
|
{
|
|
if (!m_isValid)
|
|
return;
|
|
|
|
remove(m_userStart, credentialsEnd() - m_userStart);
|
|
}
|
|
|
|
void URL::setFragmentIdentifier(StringView identifier)
|
|
{
|
|
if (!m_isValid)
|
|
return;
|
|
|
|
parse(makeString(StringView(m_string).left(m_queryEnd), '#', identifier));
|
|
}
|
|
|
|
void URL::removeFragmentIdentifier()
|
|
{
|
|
if (!m_isValid)
|
|
return;
|
|
|
|
m_string = m_string.left(m_queryEnd);
|
|
}
|
|
|
|
void URL::removeQueryAndFragmentIdentifier()
|
|
{
|
|
if (!m_isValid)
|
|
return;
|
|
|
|
m_string = m_string.left(m_pathEnd);
|
|
m_queryEnd = m_pathEnd;
|
|
}
|
|
|
|
void URL::setQuery(StringView newQuery)
|
|
{
|
|
// FIXME: Consider renaming this function to setEncodedQuery and/or calling percentEncodeCharacters the way setPath does.
|
|
// https://webkit.org/b/161176
|
|
|
|
if (!m_isValid)
|
|
return;
|
|
|
|
parse(makeString(
|
|
StringView(m_string).left(m_pathEnd),
|
|
(!newQuery.startsWith('?') && !newQuery.isNull()) ? "?" : "",
|
|
newQuery,
|
|
StringView(m_string).substring(m_queryEnd)
|
|
));
|
|
}
|
|
|
|
static String escapePathWithoutCopying(StringView path)
|
|
{
|
|
auto questionMarkOrNumberSign = [] (UChar character) {
|
|
return character == '?' || character == '#';
|
|
};
|
|
return percentEncodeCharacters(path.toStringWithoutCopying(), questionMarkOrNumberSign);
|
|
}
|
|
|
|
void URL::setPath(StringView path)
|
|
{
|
|
if (!m_isValid)
|
|
return;
|
|
|
|
parse(makeString(
|
|
StringView(m_string).left(pathStart()),
|
|
path.startsWith('/') || (path.startsWith('\\') && (hasSpecialScheme() || protocolIs("file"))) ? "" : "/",
|
|
escapePathWithoutCopying(path),
|
|
StringView(m_string).substring(m_pathEnd)
|
|
));
|
|
}
|
|
|
|
StringView URL::stringWithoutQueryOrFragmentIdentifier() const
|
|
{
|
|
if (!m_isValid)
|
|
return m_string;
|
|
|
|
return StringView(m_string).left(pathEnd());
|
|
}
|
|
|
|
StringView URL::stringWithoutFragmentIdentifier() const
|
|
{
|
|
if (!m_isValid)
|
|
return m_string;
|
|
|
|
return StringView(m_string).left(m_queryEnd);
|
|
}
|
|
|
|
bool equalIgnoringFragmentIdentifier(const URL& a, const URL& b)
|
|
{
|
|
return a.stringWithoutFragmentIdentifier() == b.stringWithoutFragmentIdentifier();
|
|
}
|
|
|
|
bool protocolHostAndPortAreEqual(const URL& a, const URL& b)
|
|
{
|
|
if (a.m_schemeEnd != b.m_schemeEnd)
|
|
return false;
|
|
|
|
unsigned hostStartA = a.hostStart();
|
|
unsigned hostLengthA = a.m_hostEnd - hostStartA;
|
|
unsigned hostStartB = b.hostStart();
|
|
unsigned hostLengthB = b.m_hostEnd - b.hostStart();
|
|
if (hostLengthA != hostLengthB)
|
|
return false;
|
|
|
|
// Check the scheme
|
|
for (unsigned i = 0; i < a.m_schemeEnd; ++i) {
|
|
if (a.string()[i] != b.string()[i])
|
|
return false;
|
|
}
|
|
|
|
// And the host
|
|
for (unsigned i = 0; i < hostLengthA; ++i) {
|
|
if (a.string()[hostStartA + i] != b.string()[hostStartB + i])
|
|
return false;
|
|
}
|
|
|
|
if (a.port() != b.port())
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
bool URL::isMatchingDomain(StringView domain) const
|
|
{
|
|
// FIXME: Consider moving this to an appropriate place in WebCore's plug-in code; don't want people tempted to use this instead of SecurityOrigin.
|
|
|
|
if (isNull())
|
|
return false;
|
|
|
|
if (domain.isEmpty())
|
|
return true;
|
|
|
|
if (!protocolIsInHTTPFamily())
|
|
return false;
|
|
|
|
auto host = this->host();
|
|
if (!host.endsWith(domain))
|
|
return false;
|
|
|
|
return host.length() == domain.length() || host[host.length() - domain.length() - 1] == '.';
|
|
}
|
|
|
|
// FIXME: Rename this so it's clear that it does the appropriate escaping for URL query field values.
|
|
String encodeWithURLEscapeSequences(const String& input)
|
|
{
|
|
return percentEncodeCharacters(input, URLParser::isInUserInfoEncodeSet);
|
|
}
|
|
|
|
bool URL::isHierarchical() const
|
|
{
|
|
if (!m_isValid)
|
|
return false;
|
|
ASSERT(m_string[m_schemeEnd] == ':');
|
|
return m_string[m_schemeEnd + 1] == '/';
|
|
}
|
|
|
|
static bool protocolIsInternal(StringView string, const char* protocol)
|
|
{
|
|
assertProtocolIsGood(protocol);
|
|
bool isLeading = true;
|
|
for (auto codeUnit : string.codeUnits()) {
|
|
if (isLeading) {
|
|
// Skip leading whitespace and control characters.
|
|
if (shouldTrimFromURL(codeUnit))
|
|
continue;
|
|
isLeading = false;
|
|
} else {
|
|
// Skip tabs and newlines even later in the protocol.
|
|
if (codeUnit == '\t' || codeUnit == '\r' || codeUnit == '\n')
|
|
continue;
|
|
}
|
|
|
|
char expectedCharacter = *protocol++;
|
|
if (!expectedCharacter)
|
|
return codeUnit == ':';
|
|
if (!isASCIIAlphaCaselessEqual(codeUnit, expectedCharacter))
|
|
return false;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool protocolIs(StringView string, const char* protocol)
|
|
{
|
|
return protocolIsInternal(string, protocol);
|
|
}
|
|
|
|
#ifndef NDEBUG
|
|
|
|
void URL::print() const
|
|
{
|
|
printf("%s\n", m_string.utf8().data());
|
|
}
|
|
|
|
#endif
|
|
|
|
void URL::dump(PrintStream& out) const
|
|
{
|
|
out.print(m_string);
|
|
}
|
|
|
|
String URL::strippedForUseAsReferrer() const
|
|
{
|
|
if (!m_isValid)
|
|
return m_string;
|
|
|
|
unsigned end = credentialsEnd();
|
|
|
|
if (m_userStart == end && m_queryEnd == m_string.length())
|
|
return m_string;
|
|
|
|
return makeString(
|
|
StringView(m_string).substring(0, m_userStart),
|
|
StringView(m_string).substring(end, m_queryEnd - end)
|
|
);
|
|
}
|
|
|
|
bool URL::isLocalFile() const
|
|
{
|
|
// Including feed here might be a bad idea since drag and drop uses this check
|
|
// and including feed would allow feeds to potentially let someone's blog
|
|
// read the contents of the clipboard on a drag, even without a drop.
|
|
// Likewise with using the FrameLoader::shouldTreatURLAsLocal() function.
|
|
return protocolIs("file");
|
|
}
|
|
|
|
bool protocolIsJavaScript(StringView string)
|
|
{
|
|
return protocolIsInternal(string, "javascript");
|
|
}
|
|
|
|
bool protocolIsInHTTPFamily(StringView url)
|
|
{
|
|
auto length = url.length();
|
|
// Do the comparison without making a new string object.
|
|
return length >= 5
|
|
&& isASCIIAlphaCaselessEqual(url[0], 'h')
|
|
&& isASCIIAlphaCaselessEqual(url[1], 't')
|
|
&& isASCIIAlphaCaselessEqual(url[2], 't')
|
|
&& isASCIIAlphaCaselessEqual(url[3], 'p')
|
|
&& (url[4] == ':' || (isASCIIAlphaCaselessEqual(url[4], 's') && length >= 6 && url[5] == ':'));
|
|
}
|
|
|
|
const URL& aboutBlankURL()
|
|
{
|
|
static LazyNeverDestroyed<URL> staticBlankURL;
|
|
static std::once_flag onceFlag;
|
|
std::call_once(onceFlag, [&] {
|
|
static constexpr const char* aboutBlank = "about:blank";
|
|
staticBlankURL.construct(URL(), StringImpl::createStaticStringImpl(aboutBlank, strlen(aboutBlank)));
|
|
});
|
|
return staticBlankURL;
|
|
}
|
|
|
|
const URL& aboutSrcDocURL()
|
|
{
|
|
static LazyNeverDestroyed<URL> staticSrcDocURL;
|
|
static std::once_flag onceFlag;
|
|
std::call_once(onceFlag, [&] {
|
|
static constexpr const char* aboutSrcDoc = "about:srcdoc";
|
|
staticSrcDocURL.construct(URL(), StringImpl::createStaticStringImpl(aboutSrcDoc, strlen(aboutSrcDoc)));
|
|
});
|
|
return staticSrcDocURL;
|
|
}
|
|
|
|
bool URL::protocolIsAbout() const
|
|
{
|
|
return protocolIs("about");
|
|
}
|
|
|
|
bool portAllowed(const URL& url)
|
|
{
|
|
Optional<uint16_t> port = url.port();
|
|
|
|
// Since most URLs don't have a port, return early for the "no port" case.
|
|
if (!port)
|
|
return true;
|
|
|
|
// This blocked port list matches the port blocking that Mozilla implements.
|
|
// See http://www.mozilla.org/projects/netlib/PortBanning.html for more information.
|
|
static const uint16_t blockedPortList[] = {
|
|
1, // tcpmux
|
|
7, // echo
|
|
9, // discard
|
|
11, // systat
|
|
13, // daytime
|
|
15, // netstat
|
|
17, // qotd
|
|
19, // chargen
|
|
20, // FTP-data
|
|
21, // FTP-control
|
|
22, // SSH
|
|
23, // telnet
|
|
25, // SMTP
|
|
37, // time
|
|
42, // name
|
|
43, // nicname
|
|
53, // domain
|
|
69, // TFTP
|
|
77, // priv-rjs
|
|
79, // finger
|
|
87, // ttylink
|
|
95, // supdup
|
|
101, // hostriame
|
|
102, // iso-tsap
|
|
103, // gppitnp
|
|
104, // acr-nema
|
|
109, // POP2
|
|
110, // POP3
|
|
111, // sunrpc
|
|
113, // auth
|
|
115, // SFTP
|
|
117, // uucp-path
|
|
119, // nntp
|
|
123, // NTP
|
|
135, // loc-srv / epmap
|
|
137, // NetBIOS
|
|
139, // netbios
|
|
143, // IMAP2
|
|
161, // SNMP
|
|
179, // BGP
|
|
389, // LDAP
|
|
427, // SLP (Also used by Apple Filing Protocol)
|
|
465, // SMTP+SSL
|
|
512, // print / exec
|
|
513, // login
|
|
514, // shell
|
|
515, // printer
|
|
526, // tempo
|
|
530, // courier
|
|
531, // Chat
|
|
532, // netnews
|
|
540, // UUCP
|
|
548, // afpovertcp [Apple addition]
|
|
554, // rtsp
|
|
556, // remotefs
|
|
563, // NNTP+SSL
|
|
587, // ESMTP
|
|
601, // syslog-conn
|
|
636, // LDAP+SSL
|
|
993, // IMAP+SSL
|
|
995, // POP3+SSL
|
|
1719, // H323 (RAS)
|
|
1720, // H323 (Q931)
|
|
1723, // H323 (H245)
|
|
2049, // NFS
|
|
3659, // apple-sasl / PasswordServer [Apple addition]
|
|
4045, // lockd
|
|
4190, // ManageSieve [Apple addition]
|
|
5060, // SIP
|
|
5061, // SIPS
|
|
6000, // X11
|
|
6566, // SANE
|
|
6665, // Alternate IRC [Apple addition]
|
|
6666, // Alternate IRC [Apple addition]
|
|
6667, // Standard IRC [Apple addition]
|
|
6668, // Alternate IRC [Apple addition]
|
|
6669, // Alternate IRC [Apple addition]
|
|
6679, // Alternate IRC SSL [Apple addition]
|
|
6697, // IRC+SSL [Apple addition]
|
|
};
|
|
|
|
// If the port is not in the blocked port list, allow it.
|
|
ASSERT(std::is_sorted(std::begin(blockedPortList), std::end(blockedPortList)));
|
|
if (!std::binary_search(std::begin(blockedPortList), std::end(blockedPortList), port.value()))
|
|
return true;
|
|
|
|
// Allow ports 21 and 22 for FTP URLs, as Mozilla does.
|
|
if ((port.value() == 21 || port.value() == 22) && url.protocolIs("ftp"))
|
|
return true;
|
|
|
|
// Allow any port number in a file URL, since the port number is ignored.
|
|
if (url.protocolIs("file"))
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
String mimeTypeFromDataURL(StringView dataURL)
|
|
{
|
|
ASSERT(protocolIsInternal(dataURL, "data"));
|
|
|
|
// FIXME: What's the right behavior when the URL has a comma first, but a semicolon later?
|
|
// Currently this code will break at the semicolon in that case; should add a test.
|
|
auto index = dataURL.find(';', 5);
|
|
if (index == notFound)
|
|
index = dataURL.find(',', 5);
|
|
if (index == notFound) {
|
|
// FIXME: There was an old comment here that made it sound like this should be returning text/plain.
|
|
// But we have been returning empty string here for some time, so not changing its behavior at this time.
|
|
return emptyString();
|
|
}
|
|
if (index == 5)
|
|
return "text/plain"_s;
|
|
ASSERT(index >= 5);
|
|
return dataURL.substring(5, index - 5).convertToASCIILowercase();
|
|
}
|
|
|
|
String URL::stringCenterEllipsizedToLength(unsigned length) const
|
|
{
|
|
if (m_string.length() <= length)
|
|
return m_string;
|
|
|
|
return makeString(StringView(m_string).left(length / 2 - 1), "...", StringView(m_string).right(length / 2 - 2));
|
|
}
|
|
|
|
URL URL::fakeURLWithRelativePart(StringView relativePart)
|
|
{
|
|
return URL(URL(), makeString("webkit-fake-url://", createCanonicalUUIDString(), '/', relativePart));
|
|
}
|
|
|
|
URL URL::fileURLWithFileSystemPath(StringView path)
|
|
{
|
|
return URL(URL(), makeString(
|
|
"file://",
|
|
path.startsWith('/') ? "" : "/",
|
|
escapePathWithoutCopying(path)
|
|
));
|
|
}
|
|
|
|
StringView URL::queryWithLeadingQuestionMark() const
|
|
{
|
|
if (m_queryEnd <= m_pathEnd)
|
|
return { };
|
|
|
|
return StringView(m_string).substring(m_pathEnd, m_queryEnd - m_pathEnd);
|
|
}
|
|
|
|
StringView URL::fragmentIdentifierWithLeadingNumberSign() const
|
|
{
|
|
if (!m_isValid || m_string.length() <= m_queryEnd)
|
|
return { };
|
|
|
|
return StringView(m_string).substring(m_queryEnd);
|
|
}
|
|
|
|
bool URL::isAboutBlank() const
|
|
{
|
|
return protocolIsAbout() && path() == "blank";
|
|
}
|
|
|
|
bool URL::isAboutSrcDoc() const
|
|
{
|
|
return protocolIsAbout() && path() == "srcdoc";
|
|
}
|
|
|
|
TextStream& operator<<(TextStream& ts, const URL& url)
|
|
{
|
|
ts << url.string();
|
|
return ts;
|
|
}
|
|
|
|
#if !PLATFORM(COCOA) && !USE(SOUP)
|
|
|
|
static bool isIPv4Address(StringView string)
|
|
{
|
|
auto count = 0;
|
|
|
|
for (const auto octet : string.splitAllowingEmptyEntries('.')) {
|
|
if (count >= 4)
|
|
return false;
|
|
|
|
const auto length = octet.length();
|
|
if (!length || length > 3)
|
|
return false;
|
|
|
|
auto value = 0;
|
|
for (auto i = 0u; i < length; ++i) {
|
|
const auto digit = octet[i];
|
|
|
|
// Prohibit leading zeroes.
|
|
if (digit > '9' || digit < (!i && length > 1 ? '1' : '0'))
|
|
return false;
|
|
|
|
value = 10 * value + (digit - '0');
|
|
}
|
|
|
|
if (value > 255)
|
|
return false;
|
|
|
|
count++;
|
|
}
|
|
|
|
return (count == 4);
|
|
}
|
|
|
|
static bool isIPv6Address(StringView string)
|
|
{
|
|
enum SkipState { None, WillSkip, Skipping, Skipped, Final };
|
|
auto skipState = None;
|
|
auto count = 0;
|
|
|
|
for (const auto hextet : string.splitAllowingEmptyEntries(':')) {
|
|
if (count >= 8 || skipState == Final)
|
|
return false;
|
|
|
|
const auto length = hextet.length();
|
|
if (!length) {
|
|
// :: may be used anywhere to skip 1 to 8 hextets, but only once.
|
|
if (skipState == Skipped)
|
|
return false;
|
|
|
|
if (skipState == None)
|
|
skipState = !count ? WillSkip : Skipping;
|
|
else if (skipState == WillSkip)
|
|
skipState = Skipping;
|
|
else
|
|
skipState = Final;
|
|
continue;
|
|
}
|
|
|
|
if (skipState == WillSkip)
|
|
return false;
|
|
|
|
if (skipState == Skipping)
|
|
skipState = Skipped;
|
|
|
|
if (length > 4) {
|
|
// An IPv4 address may be used in place of the final two hextets.
|
|
if ((skipState == None && count != 6) || (skipState == Skipped && count >= 6) || !isIPv4Address(hextet))
|
|
return false;
|
|
|
|
skipState = Final;
|
|
continue;
|
|
}
|
|
|
|
for (const auto codeUnit : hextet.codeUnits()) {
|
|
// IPv6 allows leading zeroes.
|
|
if (!isASCIIHexDigit(codeUnit))
|
|
return false;
|
|
}
|
|
|
|
count++;
|
|
}
|
|
|
|
return (count == 8 && skipState == None) || skipState == Skipped || skipState == Final;
|
|
}
|
|
|
|
bool URL::hostIsIPAddress(StringView host)
|
|
{
|
|
return host.contains(':') ? isIPv6Address(host) : isIPv4Address(host);
|
|
}
|
|
|
|
#endif
|
|
|
|
} // namespace WTF
|