#include "Common/Log.h" #include "Common/StringUtils.h" #include "Common/Net/URL.h" const char *UrlEncoder::unreservedChars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~"; const char *UrlEncoder::hexChars = "0123456789ABCDEF"; int MultipartFormDataEncoder::seq = 0; void Url::Split() { size_t colonSlashSlash = url_.find("://"); if (colonSlashSlash == std::string::npos) { ERROR_LOG(IO, "Invalid URL: %s", url_.c_str()); return; } protocol_ = url_.substr(0, colonSlashSlash); size_t sep = url_.find('/', colonSlashSlash + 3); if (sep == std::string::npos) { valid_ = false; return; } host_ = url_.substr(colonSlashSlash + 3, sep - colonSlashSlash - 3); resource_ = url_.substr(sep); // include the slash! size_t portsep = host_.rfind(':'); if (portsep != host_.npos) { port_ = atoi(host_.substr(portsep + 1).c_str()); host_ = host_.substr(0, portsep); } else { port_ = protocol_ == "https" ? 443 : 80; } valid_ = protocol_.size() > 1 && host_.size() > 1; } Url Url::Relative(const std::string &next) const { if (next.size() > 2 && next[0] == '/' && next[1] == '/') { // This means use the same protocol, but the rest is new. return Url(protocol_ + ":" + next); } // Or it could just be a fully absolute URL. size_t colonSlashSlash = next.find("://"); if (colonSlashSlash != std::string::npos) { return Url(next); } // Anything else should be a new resource, but it might be directory relative. Url resolved = *this; if (next.size() > 1 && next[0] == '/') { // Easy, just replace the resource. resolved.resource_ = next; } else { size_t last_slash = resource_.find_last_of('/'); resolved.resource_ = resource_.substr(0, last_slash + 1) + next; } resolved.url_ = resolved.ToString(); return resolved; } std::string Url::ToString() const { if (!valid_) { return "about:invalid-url"; } std::string serialized = protocol_ + "://" + host_; bool needsPort = true; if (protocol_ == "https") { needsPort = port_ != 443; } else if (protocol_ == "http") { needsPort = port_ != 80; } if (needsPort) { serialized += ":" + StringFromInt(port_); } return serialized + resource_; } // UriDecode and UriEncode are from http://www.codeguru.com/cpp/cpp/string/conversions/print.php/c12759 // by jinq0123 (November 2, 2006) // Uri encode and decode. // RFC1630, RFC1738, RFC2396 // Some compilers don't like to assume (int)-1 will safely cast to (char)-1 as // the MSBs aren't 0's. Workaround the issue while maintaining table spacing. #define N1 (char)-1 const char HEX2DEC[256] = { /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ /* 0 */ N1,N1,N1,N1, N1,N1,N1,N1, N1,N1,N1,N1, N1,N1,N1,N1, /* 1 */ N1,N1,N1,N1, N1,N1,N1,N1, N1,N1,N1,N1, N1,N1,N1,N1, /* 2 */ N1,N1,N1,N1, N1,N1,N1,N1, N1,N1,N1,N1, N1,N1,N1,N1, /* 3 */ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,N1,N1, N1,N1,N1,N1, /* 4 */ N1,10,11,12, 13,14,15,N1, N1,N1,N1,N1, N1,N1,N1,N1, /* 5 */ N1,N1,N1,N1, N1,N1,N1,N1, N1,N1,N1,N1, N1,N1,N1,N1, /* 6 */ N1,10,11,12, 13,14,15,N1, N1,N1,N1,N1, N1,N1,N1,N1, /* 7 */ N1,N1,N1,N1, N1,N1,N1,N1, N1,N1,N1,N1, N1,N1,N1,N1, /* 8 */ N1,N1,N1,N1, N1,N1,N1,N1, N1,N1,N1,N1, N1,N1,N1,N1, /* 9 */ N1,N1,N1,N1, N1,N1,N1,N1, N1,N1,N1,N1, N1,N1,N1,N1, /* A */ N1,N1,N1,N1, N1,N1,N1,N1, N1,N1,N1,N1, N1,N1,N1,N1, /* B */ N1,N1,N1,N1, N1,N1,N1,N1, N1,N1,N1,N1, N1,N1,N1,N1, /* C */ N1,N1,N1,N1, N1,N1,N1,N1, N1,N1,N1,N1, N1,N1,N1,N1, /* D */ N1,N1,N1,N1, N1,N1,N1,N1, N1,N1,N1,N1, N1,N1,N1,N1, /* E */ N1,N1,N1,N1, N1,N1,N1,N1, N1,N1,N1,N1, N1,N1,N1,N1, /* F */ N1,N1,N1,N1, N1,N1,N1,N1, N1,N1,N1,N1, N1,N1,N1,N1 }; std::string UriDecode(const std::string & sSrc) { // Note from RFC1630: "Sequences which start with a percent sign // but are not followed by two hexadecimal characters (0-9, A-F) are reserved // for future extension" const unsigned char * pSrc = (const unsigned char *)sSrc.c_str(); const size_t SRC_LEN = sSrc.length(); const unsigned char * const SRC_END = pSrc + SRC_LEN; const unsigned char * const SRC_LAST_DEC = SRC_END - 2; // last decodable '%' char * const pStart = new char[SRC_LEN]; // Output will be shorter. char * pEnd = pStart; while (pSrc < SRC_LAST_DEC) { if (*pSrc == '%') { char dec1, dec2; if (N1 != (dec1 = HEX2DEC[*(pSrc + 1)]) && N1 != (dec2 = HEX2DEC[*(pSrc + 2)])) { *pEnd++ = (dec1 << 4) + dec2; pSrc += 3; continue; } } *pEnd++ = *pSrc++; } // the last 2- chars while (pSrc < SRC_END) *pEnd++ = *pSrc++; std::string sResult(pStart, pEnd); delete [] pStart; return sResult; } // Only alphanum and underscore is safe. const char SAFE[256] = { /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ /* 0 */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 1 */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 2 */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 3 */ 1,1,1,1, 1,1,1,1, 1,1,0,0, 0,0,0,0, /* 4 */ 0,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, /* 5 */ 1,1,1,1, 1,1,1,1, 1,1,1,0, 0,0,0,1, // last here is underscore. it's ok. /* 6 */ 0,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, /* 7 */ 1,1,1,1, 1,1,1,1, 1,1,1,0, 0,0,0,0, /* 8 */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* 9 */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* A */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* B */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* C */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* D */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* E */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* F */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0 }; std::string UriEncode(const std::string & sSrc) { const char DEC2HEX[16 + 1] = "0123456789ABCDEF"; const unsigned char * pSrc = (const unsigned char *)sSrc.c_str(); const size_t SRC_LEN = sSrc.length(); unsigned char * const pStart = new unsigned char[SRC_LEN * 3]; unsigned char * pEnd = pStart; const unsigned char * const SRC_END = pSrc + SRC_LEN; for (; pSrc < SRC_END; ++pSrc) { if (SAFE[*pSrc]) *pEnd++ = *pSrc; else { // escape this char *pEnd++ = '%'; *pEnd++ = DEC2HEX[*pSrc >> 4]; *pEnd++ = DEC2HEX[*pSrc & 0x0F]; } } std::string sResult((char *)pStart, (char *)pEnd); delete [] pStart; return sResult; }