gecko-dev/netwerk/base/nsURLHelper.cpp

/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim:set ts=4 sw=2 sts=2 et cindent: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "nsURLHelper.h"

#include "mozilla/Encoding.h"
#include "mozilla/RangedPtr.h"
#include "mozilla/TextUtils.h"

#include <algorithm>
#include <iterator>

#include "nsASCIIMask.h"
#include "nsIFile.h"
#include "nsIURLParser.h"
#include "nsCOMPtr.h"
#include "nsCRT.h"
#include "nsNetCID.h"
#include "mozilla/Preferences.h"
#include "prnetdb.h"
#include "mozilla/StaticPrefs_network.h"
#include "mozilla/Tokenizer.h"
#include "nsEscape.h"
#include "nsDOMString.h"
#include "mozilla/net/rust_helper.h"
#include "mozilla/net/DNS.h"

using namespace mozilla;

//----------------------------------------------------------------------------
// Init/Shutdown
//----------------------------------------------------------------------------

static bool gInitialized = false;
static StaticRefPtr<nsIURLParser> gNoAuthURLParser;
static StaticRefPtr<nsIURLParser> gAuthURLParser;
static StaticRefPtr<nsIURLParser> gStdURLParser;

static void InitGlobals() {
  nsCOMPtr<nsIURLParser> parser;

  parser = do_GetService(NS_NOAUTHURLPARSER_CONTRACTID);
  NS_ASSERTION(parser, "failed getting 'noauth' url parser");
  if (parser) {
    gNoAuthURLParser = parser;
  }

  parser = do_GetService(NS_AUTHURLPARSER_CONTRACTID);
  NS_ASSERTION(parser, "failed getting 'auth' url parser");
  if (parser) {
    gAuthURLParser = parser;
  }

  parser = do_GetService(NS_STDURLPARSER_CONTRACTID);
  NS_ASSERTION(parser, "failed getting 'std' url parser");
  if (parser) {
    gStdURLParser = parser;
  }

  gInitialized = true;
}

void net_ShutdownURLHelper() {
  if (gInitialized) {
    gInitialized = false;
  }
  gNoAuthURLParser = nullptr;
  gAuthURLParser = nullptr;
  gStdURLParser = nullptr;
}

//----------------------------------------------------------------------------
// nsIURLParser getters
//----------------------------------------------------------------------------

nsIURLParser* net_GetAuthURLParser() {
  if (!gInitialized) InitGlobals();
  return gAuthURLParser;
}

nsIURLParser* net_GetNoAuthURLParser() {
  if (!gInitialized) InitGlobals();
  return gNoAuthURLParser;
}

nsIURLParser* net_GetStdURLParser() {
  if (!gInitialized) InitGlobals();
  return gStdURLParser;
}

//---------------------------------------------------------------------------
// GetFileFromURLSpec implementations
//---------------------------------------------------------------------------
nsresult net_GetURLSpecFromDir(nsIFile* aFile, nsACString& result) {
  nsAutoCString escPath;
  nsresult rv = net_GetURLSpecFromActualFile(aFile, escPath);
  if (NS_FAILED(rv)) return rv;

  if (escPath.Last() != '/') {
    escPath += '/';
  }

  result = escPath;
  return NS_OK;
}

nsresult net_GetURLSpecFromFile(nsIFile* aFile, nsACString& result) {
  nsAutoCString escPath;
  nsresult rv = net_GetURLSpecFromActualFile(aFile, escPath);
  if (NS_FAILED(rv)) return rv;

  // if this file references a directory, then we need to ensure that the
  // URL ends with a slash.  this is important since it affects the rules
  // for relative URL resolution when this URL is used as a base URL.
  // if the file does not exist, then we make no assumption about its type,
  // and simply leave the URL unmodified.
  if (escPath.Last() != '/') {
    bool dir;
    rv = aFile->IsDirectory(&dir);
    if (NS_SUCCEEDED(rv) && dir) escPath += '/';
  }

  result = escPath;
  return NS_OK;
}

//----------------------------------------------------------------------------
// file:// URL parsing
//----------------------------------------------------------------------------

nsresult net_ParseFileURL(const nsACString& inURL, nsACString& outDirectory,
                          nsACString& outFileBaseName,
                          nsACString& outFileExtension) {
  nsresult rv;

  if (inURL.Length() >
      (uint32_t)StaticPrefs::network_standard_url_max_length()) {
    return NS_ERROR_MALFORMED_URI;
  }

  outDirectory.Truncate();
  outFileBaseName.Truncate();
  outFileExtension.Truncate();

  const nsPromiseFlatCString& flatURL = PromiseFlatCString(inURL);
  const char* url = flatURL.get();

  nsAutoCString scheme;
  rv = net_ExtractURLScheme(flatURL, scheme);
  if (NS_FAILED(rv)) return rv;

  if (!scheme.EqualsLiteral("file")) {
    NS_ERROR("must be a file:// url");
    return NS_ERROR_UNEXPECTED;
  }

  nsIURLParser* parser = net_GetNoAuthURLParser();
  NS_ENSURE_TRUE(parser, NS_ERROR_UNEXPECTED);

  uint32_t pathPos, filepathPos, directoryPos, basenamePos, extensionPos;
  int32_t pathLen, filepathLen, directoryLen, basenameLen, extensionLen;

  // invoke the parser to extract the URL path
  rv = parser->ParseURL(url, flatURL.Length(), nullptr,
                        nullptr,           // don't care about scheme
                        nullptr, nullptr,  // don't care about authority
                        &pathPos, &pathLen);
  if (NS_FAILED(rv)) return rv;

  // invoke the parser to extract filepath from the path
  rv = parser->ParsePath(url + pathPos, pathLen, &filepathPos, &filepathLen,
                         nullptr, nullptr,   // don't care about query
                         nullptr, nullptr);  // don't care about ref
  if (NS_FAILED(rv)) return rv;

  filepathPos += pathPos;

  // invoke the parser to extract the directory and filename from filepath
  rv = parser->ParseFilePath(url + filepathPos, filepathLen, &directoryPos,
                             &directoryLen, &basenamePos, &basenameLen,
                             &extensionPos, &extensionLen);
  if (NS_FAILED(rv)) return rv;

  if (directoryLen > 0) {
    outDirectory = Substring(inURL, filepathPos + directoryPos, directoryLen);
  }
  if (basenameLen > 0) {
    outFileBaseName = Substring(inURL, filepathPos + basenamePos, basenameLen);
  }
  if (extensionLen > 0) {
    outFileExtension =
        Substring(inURL, filepathPos + extensionPos, extensionLen);
  }
  // since we are using a no-auth url parser, there will never be a host
  // XXX not strictly true... file://localhost/foo/bar.html is a valid URL

  return NS_OK;
}

//----------------------------------------------------------------------------
// path manipulation functions
//----------------------------------------------------------------------------

// Replace all /./ with a / while resolving URLs
// But only till #?
mozilla::Maybe<mozilla::CompactPair<uint32_t, uint32_t>> net_CoalesceDirs(
    netCoalesceFlags flags, char* path) {
  /* Stolen from the old netlib's mkparse.c.
   *
   * modifies a url of the form   /foo/../foo1  ->  /foo1
   *                       and    /foo/./foo1   ->  /foo/foo1
   *                       and    /foo/foo1/..  ->  /foo/
   */
  char* fwdPtr = path;
  char* urlPtr = path;
  uint32_t traversal = 0;
  uint32_t special_ftp_len = 0;

  MOZ_ASSERT(*path == '/', "We expect the path to begin with /");
  if (*path != '/') {
    return Nothing();
  }

  /* Remember if this url is a special ftp one: */
  if (flags & NET_COALESCE_DOUBLE_SLASH_IS_ROOT) {
    /* some schemes (for example ftp) have the speciality that
       the path can begin // or /%2F to mark the root of the
       servers filesystem, a simple / only marks the root relative
       to the user loging in. We remember the length of the marker */
    if (nsCRT::strncasecmp(path, "/%2F", 4) == 0) {
      special_ftp_len = 4;
    } else if (strncmp(path, "//", 2) == 0) {
      special_ftp_len = 2;
    }
  }

  // replace all %2E, %2e, %2e%2e, %2e%2E, %2E%2e, %2E%2E, etc with . or ..
  // respectively if between two "/"s or "/" and NULL terminator
  constexpr int PERCENT_2E_LENGTH = sizeof("%2e") - 1;
  constexpr uint32_t PERCENT_2E_WITH_PERIOD_LENGTH = PERCENT_2E_LENGTH + 1;

  for (; (*fwdPtr != '\0') && (*fwdPtr != '?') && (*fwdPtr != '#'); ++fwdPtr) {
    // Assuming that we are currently at '/'
    if (*fwdPtr == '/' &&
        nsCRT::strncasecmp(fwdPtr + 1, "%2e", PERCENT_2E_LENGTH) == 0 &&
        (*(fwdPtr + PERCENT_2E_LENGTH + 1) == '\0' ||
         *(fwdPtr + PERCENT_2E_LENGTH + 1) == '/')) {
      *urlPtr++ = '/';
      *urlPtr++ = '.';
      fwdPtr += PERCENT_2E_LENGTH;
    }
    // If the remaining pathname is "%2e%2e" between "/"s, add ".."
    else if (*fwdPtr == '/' &&
             nsCRT::strncasecmp(fwdPtr + 1, "%2e%2e", PERCENT_2E_LENGTH * 2) ==
                 0 &&
             (*(fwdPtr + PERCENT_2E_LENGTH * 2 + 1) == '\0' ||
              *(fwdPtr + PERCENT_2E_LENGTH * 2 + 1) == '/')) {
      *urlPtr++ = '/';
      *urlPtr++ = '.';
      *urlPtr++ = '.';
      fwdPtr += PERCENT_2E_LENGTH * 2;
    }
    // If the remaining pathname is "%2e." or ".%2e" between "/"s, add ".."
    else if (*fwdPtr == '/' &&
             (nsCRT::strncasecmp(fwdPtr + 1, "%2e.",
                                 PERCENT_2E_WITH_PERIOD_LENGTH) == 0 ||
              nsCRT::strncasecmp(fwdPtr + 1, ".%2e",
                                 PERCENT_2E_WITH_PERIOD_LENGTH) == 0) &&
             (*(fwdPtr + PERCENT_2E_WITH_PERIOD_LENGTH + 1) == '\0' ||
              *(fwdPtr + PERCENT_2E_WITH_PERIOD_LENGTH + 1) == '/')) {
      *urlPtr++ = '/';
      *urlPtr++ = '.';
      *urlPtr++ = '.';
      fwdPtr += PERCENT_2E_WITH_PERIOD_LENGTH;
    } else {
      *urlPtr++ = *fwdPtr;
    }
  }
  // Copy remaining stuff past the #?;
  for (; *fwdPtr != '\0'; ++fwdPtr) {
    *urlPtr++ = *fwdPtr;
  }
  *urlPtr = '\0';  // terminate the url

  // start again, this time for real
  fwdPtr = path;
  urlPtr = path;

  for (; (*fwdPtr != '\0') && (*fwdPtr != '?') && (*fwdPtr != '#'); ++fwdPtr) {
    if (*fwdPtr == '/' && *(fwdPtr + 1) == '.' && *(fwdPtr + 2) == '/') {
      // remove . followed by slash
      ++fwdPtr;
    } else if (*fwdPtr == '/' && *(fwdPtr + 1) == '.' && *(fwdPtr + 2) == '.' &&
               (*(fwdPtr + 3) == '/' ||
                *(fwdPtr + 3) == '\0' ||  // This will take care of
                *(fwdPtr + 3) == '?' ||   // something like foo/bar/..#sometag
                *(fwdPtr + 3) == '#')) {
      // remove foo/..
      // reverse the urlPtr to the previous slash if possible
      // if url does not allow relative root then drop .. above root
      // otherwise retain them in the path
      if (traversal > 0 || !(flags & NET_COALESCE_ALLOW_RELATIVE_ROOT)) {
        if (urlPtr != path) urlPtr--;  // we must be going back at least by one
        for (; *urlPtr != '/' && urlPtr != path; urlPtr--) {
          ;  // null body
        }
        --traversal;  // count back
        // forward the fwdPtr past the ../
        fwdPtr += 2;
        // if we have reached the beginning of the path
        // while searching for the previous / and we remember
        // that it is an url that begins with /%2F then
        // advance urlPtr again by 3 chars because /%2F already
        // marks the root of the path
        if (urlPtr == path && special_ftp_len > 3) {
          ++urlPtr;
          ++urlPtr;
          ++urlPtr;
        }
        // special case if we have reached the end
        // to preserve the last /
        if (*fwdPtr == '.' && *(fwdPtr + 1) == '\0') ++urlPtr;
      } else {
        // there are to much /.. in this path, just copy them instead.
        // forward the urlPtr past the /.. and copying it

        // However if we remember it is an url that starts with
        // /%2F and urlPtr just points at the "F" of "/%2F" then do
        // not overwrite it with the /, just copy .. and move forward
        // urlPtr.
        if (special_ftp_len > 3 && urlPtr == path + special_ftp_len - 1) {
          ++urlPtr;
        } else {
          *urlPtr++ = *fwdPtr;
        }
        ++fwdPtr;
        *urlPtr++ = *fwdPtr;
        ++fwdPtr;
        *urlPtr++ = *fwdPtr;
      }
    } else {
      // count the hierachie, but only if we do not have reached
      // the root of some special urls with a special root marker
      if (*fwdPtr == '/' && *(fwdPtr + 1) != '.' &&
          (special_ftp_len != 2 || *(fwdPtr + 1) != '/')) {
        traversal++;
      }
      // copy the url incrementaly
      *urlPtr++ = *fwdPtr;
    }
  }

  /*
   *  Now lets remove trailing . case
   *     /foo/foo1/.   ->  /foo/foo1/
   */

  if ((urlPtr > (path + 1)) && (*(urlPtr - 1) == '.') &&
      (*(urlPtr - 2) == '/')) {
    urlPtr--;
  }

  // Before we start copying past ?#, we must make sure we don't overwrite
  // the first / character.  If fwdPtr is also unchanged, just copy everything
  // (this shouldn't happen unless we could get in here without a leading
  // slash).
  if (urlPtr == path && fwdPtr != path) {
    urlPtr++;
  }

  // Copy remaining stuff past the #?;
  for (; *fwdPtr != '\0'; ++fwdPtr) {
    *urlPtr++ = *fwdPtr;
  }
  *urlPtr = '\0';  // terminate the url

  uint32_t lastSlash = 0;
  uint32_t endOfBasename = 0;

  // find the last slash before # or ?
  // find the end of basename (i.e. hash, query, or end of string)
  for (; (*(path + endOfBasename) != '\0') &&
         (*(path + endOfBasename) != '?') && (*(path + endOfBasename) != '#');
       ++endOfBasename) {
  }

  // Now find the last slash starting from the end
  lastSlash = endOfBasename;
  if (lastSlash != 0 && *(path + lastSlash) == '\0') {
    --lastSlash;
  }
  // search the slash
  for (; lastSlash != 0 && *(path + lastSlash) != '/'; --lastSlash) {
  }

  return Some(mozilla::MakeCompactPair(lastSlash, endOfBasename));
}

//----------------------------------------------------------------------------
// scheme fu
//----------------------------------------------------------------------------

static bool net_IsValidSchemeChar(const char aChar) {
  return mozilla::net::rust_net_is_valid_scheme_char(aChar);
}

/* Extract URI-Scheme if possible */
nsresult net_ExtractURLScheme(const nsACString& inURI, nsACString& scheme) {
  nsACString::const_iterator start, end;
  inURI.BeginReading(start);
  inURI.EndReading(end);

  // Strip C0 and space from begining
  while (start != end) {
    if ((uint8_t)*start > 0x20) {
      break;
    }
    start++;
  }

  Tokenizer p(Substring(start, end), "\r\n\t");
  p.Record();
  if (!p.CheckChar(IsAsciiAlpha)) {
    // First char must be alpha
    return NS_ERROR_MALFORMED_URI;
  }

  while (p.CheckChar(net_IsValidSchemeChar) || p.CheckWhite()) {
    // Skip valid scheme characters or \r\n\t
  }

  if (!p.CheckChar(':')) {
    return NS_ERROR_MALFORMED_URI;
  }

  p.Claim(scheme);
  scheme.StripTaggedASCII(ASCIIMask::MaskCRLFTab());
  ToLowerCase(scheme);
  return NS_OK;
}

bool net_IsValidScheme(const nsACString& scheme) {
  return mozilla::net::rust_net_is_valid_scheme(&scheme);
}

bool net_IsAbsoluteURL(const nsACString& uri) {
  nsACString::const_iterator start, end;
  uri.BeginReading(start);
  uri.EndReading(end);

  // Strip C0 and space from begining
  while (start != end) {
    if ((uint8_t)*start > 0x20) {
      break;
    }
    start++;
  }

  Tokenizer p(Substring(start, end), "\r\n\t");

  // First char must be alpha
  if (!p.CheckChar(IsAsciiAlpha)) {
    return false;
  }

  while (p.CheckChar(net_IsValidSchemeChar) || p.CheckWhite()) {
    // Skip valid scheme characters or \r\n\t
  }
  if (!p.CheckChar(':')) {
    return false;
  }
  p.SkipWhites();

  if (!p.CheckChar('/')) {
    return false;
  }
  p.SkipWhites();

  if (p.CheckChar('/')) {
    // aSpec is really absolute. Ignore aBaseURI in this case
    return true;
  }
  return false;
}

void net_FilterURIString(const nsACString& input, nsACString& result) {
  result.Truncate();

  const auto* start = input.BeginReading();
  const auto* end = input.EndReading();

  // Trim off leading and trailing invalid chars.
  auto charFilter = [](char c) { return static_cast<uint8_t>(c) > 0x20; };
  const auto* newStart = std::find_if(start, end, charFilter);
  const auto* newEnd =
      std::find_if(std::reverse_iterator<decltype(end)>(end),
                   std::reverse_iterator<decltype(newStart)>(newStart),
                   charFilter)
          .base();

  // Check if chars need to be stripped.
  bool needsStrip = false;
  const ASCIIMaskArray& mask = ASCIIMask::MaskCRLFTab();
  for (const auto* itr = start; itr != end; ++itr) {
    if (ASCIIMask::IsMasked(mask, *itr)) {
      needsStrip = true;
      break;
    }
  }

  // Just use the passed in string rather than creating new copies if no
  // changes are necessary.
  if (newStart == start && newEnd == end && !needsStrip) {
    result = input;
    return;
  }

  result.Assign(Substring(newStart, newEnd));
  if (needsStrip) {
    result.StripTaggedASCII(mask);
  }
}

nsresult net_FilterAndEscapeURI(const nsACString& aInput, uint32_t aFlags,
                                const ASCIIMaskArray& aFilterMask,
                                nsACString& aResult) {
  aResult.Truncate();

  const auto* start = aInput.BeginReading();
  const auto* end = aInput.EndReading();

  // Trim off leading and trailing invalid chars.
  auto charFilter = [](char c) { return static_cast<uint8_t>(c) > 0x20; };
  const auto* newStart = std::find_if(start, end, charFilter);
  const auto* newEnd =
      std::find_if(std::reverse_iterator<decltype(end)>(end),
                   std::reverse_iterator<decltype(newStart)>(newStart),
                   charFilter)
          .base();

  return NS_EscapeAndFilterURL(Substring(newStart, newEnd), aFlags,
                               &aFilterMask, aResult, fallible);
}

#if defined(XP_WIN)
bool net_NormalizeFileURL(const nsACString& aURL, nsCString& aResultBuf) {
  bool writing = false;

  nsACString::const_iterator beginIter, endIter;
  aURL.BeginReading(beginIter);
  aURL.EndReading(endIter);

  const char *s, *begin = beginIter.get();

  for (s = begin; s != endIter.get(); ++s) {
    if (*s == '\\') {
      writing = true;
      if (s > begin) aResultBuf.Append(begin, s - begin);
      aResultBuf += '/';
      begin = s + 1;
    }
    if (*s == '#') {
      // Don't normalize any backslashes following the hash.
      s = endIter.get();
      break;
    }
  }
  if (writing && s > begin) aResultBuf.Append(begin, s - begin);

  return writing;
}
#endif

//----------------------------------------------------------------------------
// miscellaneous (i.e., stuff that should really be elsewhere)
//----------------------------------------------------------------------------

static inline void ToLower(char& c) {
  if ((unsigned)(c - 'A') <= (unsigned)('Z' - 'A')) c += 'a' - 'A';
}

void net_ToLowerCase(char* str, uint32_t length) {
  for (char* end = str + length; str < end; ++str) ToLower(*str);
}

void net_ToLowerCase(char* str) {
  for (; *str; ++str) ToLower(*str);
}

char* net_FindCharInSet(const char* iter, const char* stop, const char* set) {
  for (; iter != stop && *iter; ++iter) {
    for (const char* s = set; *s; ++s) {
      if (*iter == *s) return (char*)iter;
    }
  }
  return (char*)iter;
}

char* net_FindCharNotInSet(const char* iter, const char* stop,
                           const char* set) {
repeat:
  for (const char* s = set; *s; ++s) {
    if (*iter == *s) {
      if (++iter == stop) break;
      goto repeat;
    }
  }
  return (char*)iter;
}

char* net_RFindCharNotInSet(const char* stop, const char* iter,
                            const char* set) {
  --iter;
  --stop;

  if (iter == stop) return (char*)iter;

repeat:
  for (const char* s = set; *s; ++s) {
    if (*iter == *s) {
      if (--iter == stop) break;
      goto repeat;
    }
  }
  return (char*)iter;
}

#define HTTP_LWS " \t"

// Return the index of the closing quote of the string, if any
static uint32_t net_FindStringEnd(const nsCString& flatStr,
                                  uint32_t stringStart, char stringDelim) {
  NS_ASSERTION(stringStart < flatStr.Length() &&
                   flatStr.CharAt(stringStart) == stringDelim &&
                   (stringDelim == '"' || stringDelim == '\''),
               "Invalid stringStart");

  const char set[] = {stringDelim, '\\', '\0'};
  do {
    // stringStart points to either the start quote or the last
    // escaped char (the char following a '\\')

    // Write to searchStart here, so that when we get back to the
    // top of the loop right outside this one we search from the
    // right place.
    uint32_t stringEnd = flatStr.FindCharInSet(set, stringStart + 1);
    if (stringEnd == uint32_t(kNotFound)) return flatStr.Length();

    if (flatStr.CharAt(stringEnd) == '\\') {
      // Hit a backslash-escaped char.  Need to skip over it.
      stringStart = stringEnd + 1;
      if (stringStart == flatStr.Length()) return stringStart;

      // Go back to looking for the next escape or the string end
      continue;
    }

    return stringEnd;

  } while (true);

  MOZ_ASSERT_UNREACHABLE("How did we get here?");
  return flatStr.Length();
}

static uint32_t net_FindMediaDelimiter(const nsCString& flatStr,
                                       uint32_t searchStart, char delimiter) {
  do {
    // searchStart points to the spot from which we should start looking
    // for the delimiter.
    const char delimStr[] = {delimiter, '"', '\0'};
    uint32_t curDelimPos = flatStr.FindCharInSet(delimStr, searchStart);
    if (curDelimPos == uint32_t(kNotFound)) return flatStr.Length();

    char ch = flatStr.CharAt(curDelimPos);
    if (ch == delimiter) {
      // Found delimiter
      return curDelimPos;
    }

    // We hit the start of a quoted string.  Look for its end.
    searchStart = net_FindStringEnd(flatStr, curDelimPos, ch);
    if (searchStart == flatStr.Length()) return searchStart;

    ++searchStart;

    // searchStart now points to the first char after the end of the
    // string, so just go back to the top of the loop and look for
    // |delimiter| again.
  } while (true);

  MOZ_ASSERT_UNREACHABLE("How did we get here?");
  return flatStr.Length();
}

// aOffset should be added to aCharsetStart and aCharsetEnd if this
// function sets them.
static void net_ParseMediaType(const nsACString& aMediaTypeStr,
                               nsACString& aContentType,
                               nsACString& aContentCharset, int32_t aOffset,
                               bool* aHadCharset, int32_t* aCharsetStart,
                               int32_t* aCharsetEnd, bool aStrict) {
  const nsCString& flatStr = PromiseFlatCString(aMediaTypeStr);
  const char* start = flatStr.get();
  const char* end = start + flatStr.Length();

  // Trim LWS leading and trailing whitespace from type.
  const char* type = net_FindCharNotInSet(start, end, HTTP_LWS);
  const char* typeEnd = net_FindCharInSet(type, end, HTTP_LWS ";");

  const char* charset = "";
  const char* charsetEnd = charset;
  int32_t charsetParamStart = 0;
  int32_t charsetParamEnd = 0;

  uint32_t consumed = typeEnd - type;

  // Iterate over parameters
  bool typeHasCharset = false;
  uint32_t paramStart = flatStr.FindChar(';', typeEnd - start);
  if (paramStart != uint32_t(kNotFound)) {
    // We have parameters.  Iterate over them.
    uint32_t curParamStart = paramStart + 1;
    do {
      uint32_t curParamEnd =
          net_FindMediaDelimiter(flatStr, curParamStart, ';');

      const char* paramName = net_FindCharNotInSet(
          start + curParamStart, start + curParamEnd, HTTP_LWS);
      static const char charsetStr[] = "charset=";
      if (nsCRT::strncasecmp(paramName, charsetStr, sizeof(charsetStr) - 1) ==
          0) {
        charset = paramName + sizeof(charsetStr) - 1;
        charsetEnd = start + curParamEnd;
        typeHasCharset = true;
        charsetParamStart = curParamStart - 1;
        charsetParamEnd = curParamEnd;
      }

      consumed = curParamEnd;
      curParamStart = curParamEnd + 1;
    } while (curParamStart < flatStr.Length());
  }

  bool charsetNeedsQuotedStringUnescaping = false;
  if (typeHasCharset) {
    // Trim LWS leading and trailing whitespace from charset.
    charset = net_FindCharNotInSet(charset, charsetEnd, HTTP_LWS);
    if (*charset == '"') {
      charsetNeedsQuotedStringUnescaping = true;
      charsetEnd =
          start + net_FindStringEnd(flatStr, charset - start, *charset);
      charset++;
      NS_ASSERTION(charsetEnd >= charset, "Bad charset parsing");
    } else {
      charsetEnd = net_FindCharInSet(charset, charsetEnd, HTTP_LWS ";");
    }
  }

  // if the server sent "*/*", it is meaningless, so do not store it.
  // also, if type is the same as aContentType, then just update the
  // charset.  however, if charset is empty and aContentType hasn't
  // changed, then don't wipe-out an existing aContentCharset.  We
  // also want to reject a mime-type if it does not include a slash.
  // some servers give junk after the charset parameter, which may
  // include a comma, so this check makes us a bit more tolerant.

  if (type != typeEnd && memchr(type, '/', typeEnd - type) != nullptr &&
      (aStrict ? (net_FindCharNotInSet(start + consumed, end, HTTP_LWS) == end)
               : (strncmp(type, "*/*", typeEnd - type) != 0))) {
    // Common case here is that aContentType is empty
    bool eq = !aContentType.IsEmpty() &&
              aContentType.Equals(Substring(type, typeEnd),
                                  nsCaseInsensitiveCStringComparator);
    if (!eq) {
      aContentType.Assign(type, typeEnd - type);
      ToLowerCase(aContentType);
    }

    if ((!eq && *aHadCharset) || typeHasCharset) {
      *aHadCharset = true;
      if (charsetNeedsQuotedStringUnescaping) {
        // parameters using the "quoted-string" syntax need
        // backslash-escapes to be unescaped (see RFC 2616 Section 2.2)
        aContentCharset.Truncate();
        for (const char* c = charset; c != charsetEnd; c++) {
          if (*c == '\\' && c + 1 != charsetEnd) {
            // eat escape
            c++;
          }
          aContentCharset.Append(*c);
        }
      } else {
        aContentCharset.Assign(charset, charsetEnd - charset);
      }
      if (typeHasCharset) {
        *aCharsetStart = charsetParamStart + aOffset;
        *aCharsetEnd = charsetParamEnd + aOffset;
      }
    }
    // Only set a new charset position if this is a different type
    // from the last one we had and it doesn't already have a
    // charset param.  If this is the same type, we probably want
    // to leave the charset position on its first occurrence.
    if (!eq && !typeHasCharset) {
      int32_t charsetStart = int32_t(paramStart);
      if (charsetStart == kNotFound) charsetStart = flatStr.Length();

      *aCharsetEnd = *aCharsetStart = charsetStart + aOffset;
    }
  }
}

#undef HTTP_LWS

void net_ParseContentType(const nsACString& aHeaderStr,
                          nsACString& aContentType, nsACString& aContentCharset,
                          bool* aHadCharset) {
  int32_t dummy1, dummy2;
  net_ParseContentType(aHeaderStr, aContentType, aContentCharset, aHadCharset,
                       &dummy1, &dummy2);
}

void net_ParseContentType(const nsACString& aHeaderStr,
                          nsACString& aContentType, nsACString& aContentCharset,
                          bool* aHadCharset, int32_t* aCharsetStart,
                          int32_t* aCharsetEnd) {
  //
  // Augmented BNF (from RFC 2616 section 3.7):
  //
  //   header-value = media-type *( LWS "," LWS media-type )
  //   media-type   = type "/" subtype *( LWS ";" LWS parameter )
  //   type         = token
  //   subtype      = token
  //   parameter    = attribute "=" value
  //   attribute    = token
  //   value        = token | quoted-string
  //
  //
  // Examples:
  //
  //   text/html
  //   text/html, text/html
  //   text/html,text/html; charset=ISO-8859-1
  //   text/html,text/html; charset="ISO-8859-1"
  //   text/html;charset=ISO-8859-1, text/html
  //   text/html;charset='ISO-8859-1', text/html
  //   application/octet-stream
  //

  *aHadCharset = false;
  const nsCString& flatStr = PromiseFlatCString(aHeaderStr);

  // iterate over media-types.  Note that ',' characters can happen
  // inside quoted strings, so we need to watch out for that.
  uint32_t curTypeStart = 0;
  do {
    // curTypeStart points to the start of the current media-type.  We want
    // to look for its end.
    uint32_t curTypeEnd = net_FindMediaDelimiter(flatStr, curTypeStart, ',');

    // At this point curTypeEnd points to the spot where the media-type
    // starting at curTypeEnd ends.  Time to parse that!
    net_ParseMediaType(
        Substring(flatStr, curTypeStart, curTypeEnd - curTypeStart),
        aContentType, aContentCharset, curTypeStart, aHadCharset, aCharsetStart,
        aCharsetEnd, false);

    // And let's move on to the next media-type
    curTypeStart = curTypeEnd + 1;
  } while (curTypeStart < flatStr.Length());
}

void net_ParseRequestContentType(const nsACString& aHeaderStr,
                                 nsACString& aContentType,
                                 nsACString& aContentCharset,
                                 bool* aHadCharset) {
  //
  // Augmented BNF (from RFC 7231 section 3.1.1.1):
  //
  //   media-type   = type "/" subtype *( OWS ";" OWS parameter )
  //   type         = token
  //   subtype      = token
  //   parameter    = token "=" ( token / quoted-string )
  //
  // Examples:
  //
  //   text/html
  //   text/html; charset=ISO-8859-1
  //   text/html; charset="ISO-8859-1"
  //   application/octet-stream
  //

  aContentType.Truncate();
  aContentCharset.Truncate();
  *aHadCharset = false;
  const nsCString& flatStr = PromiseFlatCString(aHeaderStr);

  // At this point curTypeEnd points to the spot where the media-type
  // starting at curTypeEnd ends.  Time to parse that!
  nsAutoCString contentType, contentCharset;
  bool hadCharset = false;
  int32_t dummy1, dummy2;
  uint32_t typeEnd = net_FindMediaDelimiter(flatStr, 0, ',');
  if (typeEnd != flatStr.Length()) {
    // We have some stuff left at the end, so this is not a valid
    // request Content-Type header.
    return;
  }
  net_ParseMediaType(flatStr, contentType, contentCharset, 0, &hadCharset,
                     &dummy1, &dummy2, true);

  aContentType = contentType;
  aContentCharset = contentCharset;
  *aHadCharset = hadCharset;
}

bool net_IsValidDNSHost(const nsACString& host) {
  // The host name is limited to 253 ascii characters.
  if (host.Length() > 253) {
    return false;
  }

  const char* end = host.EndReading();
  // Use explicit whitelists to select which characters we are
  // willing to send to lower-level DNS logic. This is more
  // self-documenting, and can also be slightly faster than the
  // blacklist approach, since DNS names are the common case, and
  // the commonest characters will tend to be near the start of
  // the list.

  // Whitelist for DNS names (RFC 1035) with extra characters added
  // for pragmatic reasons "$+_"
  // see https://bugzilla.mozilla.org/show_bug.cgi?id=355181#c2
  if (net_FindCharNotInSet(host.BeginReading(), end,
                           "abcdefghijklmnopqrstuvwxyz"
                           ".-0123456789"
                           "ABCDEFGHIJKLMNOPQRSTUVWXYZ$+_") == end) {
    return true;
  }

  // Might be a valid IPv6 link-local address containing a percent sign
  return mozilla::net::HostIsIPLiteral(host);
}

bool net_IsValidIPv4Addr(const nsACString& aAddr) {
  return mozilla::net::rust_net_is_valid_ipv4_addr(&aAddr);
}

bool net_IsValidIPv6Addr(const nsACString& aAddr) {
  return mozilla::net::rust_net_is_valid_ipv6_addr(&aAddr);
}

bool net_GetDefaultStatusTextForCode(uint16_t aCode, nsACString& aOutText) {
  switch (aCode) {
      // start with the most common
    case 200:
      aOutText.AssignLiteral("OK");
      break;
    case 404:
      aOutText.AssignLiteral("Not Found");
      break;
    case 301:
      aOutText.AssignLiteral("Moved Permanently");
      break;
    case 304:
      aOutText.AssignLiteral("Not Modified");
      break;
    case 307:
      aOutText.AssignLiteral("Temporary Redirect");
      break;
    case 500:
      aOutText.AssignLiteral("Internal Server Error");
      break;

      // also well known
    case 100:
      aOutText.AssignLiteral("Continue");
      break;
    case 101:
      aOutText.AssignLiteral("Switching Protocols");
      break;
    case 201:
      aOutText.AssignLiteral("Created");
      break;
    case 202:
      aOutText.AssignLiteral("Accepted");
      break;
    case 203:
      aOutText.AssignLiteral("Non Authoritative");
      break;
    case 204:
      aOutText.AssignLiteral("No Content");
      break;
    case 205:
      aOutText.AssignLiteral("Reset Content");
      break;
    case 206:
      aOutText.AssignLiteral("Partial Content");
      break;
    case 207:
      aOutText.AssignLiteral("Multi-Status");
      break;
    case 208:
      aOutText.AssignLiteral("Already Reported");
      break;
    case 300:
      aOutText.AssignLiteral("Multiple Choices");
      break;
    case 302:
      aOutText.AssignLiteral("Found");
      break;
    case 303:
      aOutText.AssignLiteral("See Other");
      break;
    case 305:
      aOutText.AssignLiteral("Use Proxy");
      break;
    case 308:
      aOutText.AssignLiteral("Permanent Redirect");
      break;
    case 400:
      aOutText.AssignLiteral("Bad Request");
      break;
    case 401:
      aOutText.AssignLiteral("Unauthorized");
      break;
    case 402:
      aOutText.AssignLiteral("Payment Required");
      break;
    case 403:
      aOutText.AssignLiteral("Forbidden");
      break;
    case 405:
      aOutText.AssignLiteral("Method Not Allowed");
      break;
    case 406:
      aOutText.AssignLiteral("Not Acceptable");
      break;
    case 407:
      aOutText.AssignLiteral("Proxy Authentication Required");
      break;
    case 408:
      aOutText.AssignLiteral("Request Timeout");
      break;
    case 409:
      aOutText.AssignLiteral("Conflict");
      break;
    case 410:
      aOutText.AssignLiteral("Gone");
      break;
    case 411:
      aOutText.AssignLiteral("Length Required");
      break;
    case 412:
      aOutText.AssignLiteral("Precondition Failed");
      break;
    case 413:
      aOutText.AssignLiteral("Request Entity Too Large");
      break;
    case 414:
      aOutText.AssignLiteral("Request URI Too Long");
      break;
    case 415:
      aOutText.AssignLiteral("Unsupported Media Type");
      break;
    case 416:
      aOutText.AssignLiteral("Requested Range Not Satisfiable");
      break;
    case 417:
      aOutText.AssignLiteral("Expectation Failed");
      break;
    case 418:
      aOutText.AssignLiteral("I'm a teapot");
      break;
    case 421:
      aOutText.AssignLiteral("Misdirected Request");
      break;
    case 422:
      aOutText.AssignLiteral("Unprocessable Entity");
      break;
    case 423:
      aOutText.AssignLiteral("Locked");
      break;
    case 424:
      aOutText.AssignLiteral("Failed Dependency");
      break;
    case 425:
      aOutText.AssignLiteral("Too Early");
      break;
    case 426:
      aOutText.AssignLiteral("Upgrade Required");
      break;
    case 428:
      aOutText.AssignLiteral("Precondition Required");
      break;
    case 429:
      aOutText.AssignLiteral("Too Many Requests");
      break;
    case 431:
      aOutText.AssignLiteral("Request Header Fields Too Large");
      break;
    case 451:
      aOutText.AssignLiteral("Unavailable For Legal Reasons");
      break;
    case 501:
      aOutText.AssignLiteral("Not Implemented");
      break;
    case 502:
      aOutText.AssignLiteral("Bad Gateway");
      break;
    case 503:
      aOutText.AssignLiteral("Service Unavailable");
      break;
    case 504:
      aOutText.AssignLiteral("Gateway Timeout");
      break;
    case 505:
      aOutText.AssignLiteral("HTTP Version Unsupported");
      break;
    case 506:
      aOutText.AssignLiteral("Variant Also Negotiates");
      break;
    case 507:
      aOutText.AssignLiteral("Insufficient Storage ");
      break;
    case 508:
      aOutText.AssignLiteral("Loop Detected");
      break;
    case 510:
      aOutText.AssignLiteral("Not Extended");
      break;
    case 511:
      aOutText.AssignLiteral("Network Authentication Required");
      break;
    default:
      aOutText.AssignLiteral("No Reason Phrase");
      return false;
  }
  return true;
}

static auto MakeNameMatcher(const nsACString& aName) {
  return [&aName](const auto& param) { return param.mKey.Equals(aName); };
}

static void AssignMaybeInvalidUTF8String(const nsACString& aSource,
                                         nsACString& aDest) {
  if (NS_FAILED(UTF_8_ENCODING->DecodeWithoutBOMHandling(aSource, aDest))) {
    MOZ_CRASH("Out of memory when converting URL params.");
  }
}

namespace mozilla {

bool URLParams::Has(const nsACString& aName) {
  return std::any_of(mParams.cbegin(), mParams.cend(), MakeNameMatcher(aName));
}

bool URLParams::Has(const nsACString& aName, const nsACString& aValue) {
  return std::any_of(
      mParams.cbegin(), mParams.cend(), [&aName, &aValue](const auto& param) {
        return param.mKey.Equals(aName) && param.mValue.Equals(aValue);
      });
}

void URLParams::Get(const nsACString& aName, nsACString& aRetval) {
  aRetval.SetIsVoid(true);

  const auto end = mParams.cend();
  const auto it = std::find_if(mParams.cbegin(), end, MakeNameMatcher(aName));
  if (it != end) {
    aRetval.Assign(it->mValue);
  }
}

void URLParams::GetAll(const nsACString& aName, nsTArray<nsCString>& aRetval) {
  aRetval.Clear();

  for (uint32_t i = 0, len = mParams.Length(); i < len; ++i) {
    if (mParams[i].mKey.Equals(aName)) {
      aRetval.AppendElement(mParams[i].mValue);
    }
  }
}

void URLParams::Append(const nsACString& aName, const nsACString& aValue) {
  Param* param = mParams.AppendElement();
  param->mKey = aName;
  param->mValue = aValue;
}

void URLParams::Set(const nsACString& aName, const nsACString& aValue) {
  Param* param = nullptr;
  for (uint32_t i = 0, len = mParams.Length(); i < len;) {
    if (!mParams[i].mKey.Equals(aName)) {
      ++i;
      continue;
    }
    if (!param) {
      param = &mParams[i];
      ++i;
      continue;
    }
    // Remove duplicates.
    mParams.RemoveElementAt(i);
    --len;
  }

  if (!param) {
    param = mParams.AppendElement();
    param->mKey = aName;
  }

  param->mValue = aValue;
}

void URLParams::Delete(const nsACString& aName) {
  mParams.RemoveElementsBy(
      [&aName](const auto& param) { return param.mKey.Equals(aName); });
}

void URLParams::Delete(const nsACString& aName, const nsACString& aValue) {
  mParams.RemoveElementsBy([&aName, &aValue](const auto& param) {
    return param.mKey.Equals(aName) && param.mValue.Equals(aValue);
  });
}

/* static */
void URLParams::DecodeString(const nsACString& aInput, nsACString& aOutput) {
  const char* const end = aInput.EndReading();
  for (const char* iter = aInput.BeginReading(); iter != end;) {
    // replace '+' with U+0020
    if (*iter == '+') {
      aOutput.Append(' ');
      ++iter;
      continue;
    }

    // Percent decode algorithm
    if (*iter == '%') {
      const char* const first = iter + 1;
      const char* const second = first + 1;

      const auto asciiHexDigit = [](char x) {
        return (x >= 0x41 && x <= 0x46) || (x >= 0x61 && x <= 0x66) ||
               (x >= 0x30 && x <= 0x39);
      };

      const auto hexDigit = [](char x) {
        return x >= 0x30 && x <= 0x39
                   ? x - 0x30
                   : (x >= 0x41 && x <= 0x46 ? x - 0x37 : x - 0x57);
      };

      if (first != end && second != end && asciiHexDigit(*first) &&
          asciiHexDigit(*second)) {
        aOutput.Append(hexDigit(*first) * 16 + hexDigit(*second));
        iter = second + 1;
      } else {
        aOutput.Append('%');
        ++iter;
      }

      continue;
    }

    aOutput.Append(*iter);
    ++iter;
  }
  AssignMaybeInvalidUTF8String(aOutput, aOutput);
}

/* static */
bool URLParams::ParseNextInternal(const char*& aStart, const char* const aEnd,
                                  bool aShouldDecode, nsACString* aOutputName,
                                  nsACString* aOutputValue) {
  nsDependentCSubstring string;

  const char* const iter = std::find(aStart, aEnd, '&');
  if (iter != aEnd) {
    string.Rebind(aStart, iter);
    aStart = iter + 1;
  } else {
    string.Rebind(aStart, aEnd);
    aStart = aEnd;
  }

  if (string.IsEmpty()) {
    return false;
  }

  const auto* const eqStart = string.BeginReading();
  const auto* const eqEnd = string.EndReading();
  const auto* const eqIter = std::find(eqStart, eqEnd, '=');

  nsDependentCSubstring name;
  nsDependentCSubstring value;

  if (eqIter != eqEnd) {
    name.Rebind(eqStart, eqIter);
    value.Rebind(eqIter + 1, eqEnd);
  } else {
    name.Rebind(string, 0);
  }

  if (aShouldDecode) {
    DecodeString(name, *aOutputName);
    DecodeString(value, *aOutputValue);
    return true;
  }

  AssignMaybeInvalidUTF8String(name, *aOutputName);
  AssignMaybeInvalidUTF8String(value, *aOutputValue);
  return true;
}

/* static */
bool URLParams::Extract(const nsACString& aInput, const nsACString& aName,
                        nsACString& aValue) {
  aValue.SetIsVoid(true);
  return !URLParams::Parse(
      aInput, true,
      [&aName, &aValue](const nsACString& name, nsCString&& value) {
        if (aName == name) {
          aValue = std::move(value);
          return false;
        }
        return true;
      });
}

void URLParams::ParseInput(const nsACString& aInput) {
  // Remove all the existing data before parsing a new input.
  DeleteAll();

  URLParams::Parse(aInput, true, [this](nsCString&& name, nsCString&& value) {
    mParams.AppendElement(Param{std::move(name), std::move(value)});
    return true;
  });
}

void URLParams::SerializeString(const nsACString& aInput, nsACString& aValue) {
  const unsigned char* p = (const unsigned char*)aInput.BeginReading();
  const unsigned char* end = p + aInput.Length();

  while (p != end) {
    // ' ' to '+'
    if (*p == 0x20) {
      aValue.Append(0x2B);
      // Percent Encode algorithm
    } else if (*p == 0x2A || *p == 0x2D || *p == 0x2E ||
               (*p >= 0x30 && *p <= 0x39) || (*p >= 0x41 && *p <= 0x5A) ||
               *p == 0x5F || (*p >= 0x61 && *p <= 0x7A)) {
      aValue.Append(*p);
    } else {
      aValue.AppendPrintf("%%%.2X", *p);
    }

    ++p;
  }
}

void URLParams::Serialize(nsACString& aValue, bool aEncode) const {
  aValue.Truncate();
  bool first = true;

  for (uint32_t i = 0, len = mParams.Length(); i < len; ++i) {
    if (first) {
      first = false;
    } else {
      aValue.Append('&');
    }

    // XXX Actually, it's not necessary to build a new string object. Generally,
    // such cases could just convert each codepoint one-by-one.
    if (aEncode) {
      SerializeString(mParams[i].mKey, aValue);
      aValue.Append('=');
      SerializeString(mParams[i].mValue, aValue);
    } else {
      aValue.Append(mParams[i].mKey);
      aValue.Append('=');
      aValue.Append(mParams[i].mValue);
    }
  }
}

void URLParams::Sort() {
  mParams.StableSort([](const Param& lhs, const Param& rhs) {
    // FIXME(emilio, bug 1888901): The URLSearchParams.sort() spec requires
    // comparing by utf-16 code points... That's a bit unfortunate, maybe we
    // can optimize the string conversions here?
    return Compare(NS_ConvertUTF8toUTF16(lhs.mKey),
                   NS_ConvertUTF8toUTF16(rhs.mKey));
  });
}

}  // namespace mozilla