mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-12-14 18:51:28 +00:00
de88a69286
protocols underlying the Web.
729 lines
12 KiB
C
729 lines
12 KiB
C
/*
|
|
* The contents of this file are subject to the Mozilla Public
|
|
* License Version 1.1 (the "License"); you may not use this file
|
|
* except in compliance with the License. You may obtain a copy of
|
|
* the License at http://www.mozilla.org/MPL/
|
|
*
|
|
* Software distributed under the License is distributed on an "AS
|
|
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
|
* implied. See the License for the specific language governing
|
|
* rights and limitations under the License.
|
|
*
|
|
* The Original Code is Web Sniffer.
|
|
*
|
|
* The Initial Developer of the Original Code is Erik van der Poel.
|
|
* Portions created by Erik van der Poel are
|
|
* Copyright (C) 1998,1999,2000 Erik van der Poel.
|
|
* All Rights Reserved.
|
|
*
|
|
* Contributor(s):
|
|
*/
|
|
|
|
#include <malloc.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
|
|
#include "url.h"
|
|
#include "utils.h"
|
|
|
|
typedef struct StackEntry
|
|
{
|
|
unsigned char *str;
|
|
struct StackEntry *next;
|
|
struct StackEntry *previous;
|
|
} StackEntry;
|
|
|
|
typedef struct Stack
|
|
{
|
|
StackEntry *bottom;
|
|
StackEntry *top;
|
|
} Stack;
|
|
|
|
static URL *
|
|
urlAlloc(void)
|
|
{
|
|
URL *result;
|
|
|
|
result = calloc(sizeof(URL), 1);
|
|
if (!result)
|
|
{
|
|
fprintf(stderr, "cannot calloc URL\n");
|
|
exit(0);
|
|
}
|
|
result->port = -1;
|
|
|
|
return result;
|
|
}
|
|
|
|
void
|
|
urlFree(URL *url)
|
|
{
|
|
FREE(url->file);
|
|
FREE(url->fragment);
|
|
FREE(url->host);
|
|
FREE(url->login);
|
|
FREE(url->net_loc);
|
|
FREE(url->params);
|
|
FREE(url->password);
|
|
FREE(url->path);
|
|
FREE(url->pathWithoutFile);
|
|
FREE(url->query);
|
|
FREE(url->scheme);
|
|
FREE(url->url);
|
|
FREE(url);
|
|
}
|
|
|
|
static void
|
|
urlEmbellish(URL *url)
|
|
{
|
|
unsigned char *at;
|
|
unsigned char *colon;
|
|
unsigned char *host;
|
|
unsigned char *login;
|
|
unsigned char *p;
|
|
|
|
p = (unsigned char *) strrchr((char *) url->path, '/');
|
|
if (p)
|
|
{
|
|
FREE(url->pathWithoutFile);
|
|
url->pathWithoutFile = copySizedString(url->path,
|
|
p + 1 - url->path);
|
|
p++;
|
|
}
|
|
else
|
|
{
|
|
p = url->path;
|
|
}
|
|
if (p[0])
|
|
{
|
|
FREE(url->file);
|
|
url->file = copyString(p);
|
|
}
|
|
if (url->net_loc)
|
|
{
|
|
at = (unsigned char *) strchr((char *) url->net_loc, '@');
|
|
if (at)
|
|
{
|
|
login = url->net_loc;
|
|
colon = (unsigned char *) strchr((char *) login, ':');
|
|
if (colon && (colon < at))
|
|
{
|
|
url->password = copySizedString(colon + 1,
|
|
at - colon - 1);
|
|
url->login = copySizedString(login,
|
|
colon - login);
|
|
}
|
|
else
|
|
{
|
|
url->login = copySizedString(login,
|
|
at - login);
|
|
}
|
|
host = at + 1;
|
|
}
|
|
else
|
|
{
|
|
host = url->net_loc;
|
|
}
|
|
colon = (unsigned char *) strchr((char *) host, ':');
|
|
if (colon)
|
|
{
|
|
url->host = lowerCase(copySizedString(host,
|
|
colon - host));
|
|
sscanf((char *) colon + 1, "%d", &url->port);
|
|
}
|
|
else
|
|
{
|
|
FREE(url->host);
|
|
url->host = lowerCase(copyString(host));
|
|
}
|
|
}
|
|
}
|
|
|
|
URL *
|
|
urlParse(const unsigned char *urlStr)
|
|
{
|
|
unsigned char c;
|
|
unsigned char *net_loc;
|
|
unsigned char *p;
|
|
unsigned char *path;
|
|
unsigned char *str;
|
|
URL *url;
|
|
|
|
if ((!urlStr) || (!*urlStr))
|
|
{
|
|
return NULL;
|
|
}
|
|
url = urlAlloc();
|
|
url->url = copyString(urlStr);
|
|
str = copyString(urlStr);
|
|
p = (unsigned char *) strchr((char *) str, '#');
|
|
if (p)
|
|
{
|
|
url->fragment = copyString(p);
|
|
*p = 0;
|
|
}
|
|
p = str;
|
|
c = *p;
|
|
while
|
|
(
|
|
(('a' <= c) && (c <= 'z')) ||
|
|
(('A' <= c) && (c <= 'Z')) ||
|
|
(('0' <= c) && (c <= '9')) ||
|
|
(c == '+') ||
|
|
(c == '.') ||
|
|
(c == '-')
|
|
)
|
|
{
|
|
p++;
|
|
c = *p;
|
|
}
|
|
if ((c == ':') && (p > str))
|
|
{
|
|
url->scheme = lowerCase(copySizedString(str, p - str));
|
|
p++;
|
|
}
|
|
else
|
|
{
|
|
p = str;
|
|
}
|
|
if ((p[0] == '/') && (p[1] == '/'))
|
|
{
|
|
net_loc = p + 2;
|
|
p = (unsigned char *) strchr((char *) net_loc, '/');
|
|
if (p)
|
|
{
|
|
if (p > net_loc)
|
|
{
|
|
url->net_loc = copySizedString(net_loc,
|
|
p - net_loc);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (*net_loc)
|
|
{
|
|
url->net_loc = copyString(net_loc);
|
|
}
|
|
p = (unsigned char *) strchr((char *) net_loc, 0);
|
|
}
|
|
}
|
|
path = p;
|
|
p = (unsigned char *) strchr((char *) p, '?');
|
|
if (p)
|
|
{
|
|
url->query = copyString(p);
|
|
*p = 0;
|
|
}
|
|
p = path;
|
|
p = (unsigned char *) strchr((char *) p, ';');
|
|
if (p)
|
|
{
|
|
url->params = copyString(p);
|
|
*p = 0;
|
|
}
|
|
url->path = copyString(path);
|
|
|
|
urlEmbellish(url);
|
|
|
|
free(str);
|
|
|
|
return url;
|
|
}
|
|
|
|
static unsigned char *
|
|
pop(Stack *stack)
|
|
{
|
|
unsigned char *result;
|
|
StackEntry *top;
|
|
|
|
if (stack->top)
|
|
{
|
|
top = stack->top;
|
|
result = top->str;
|
|
stack->top = top->previous;
|
|
if (stack->top)
|
|
{
|
|
stack->top->next = NULL;
|
|
}
|
|
else
|
|
{
|
|
stack->bottom = NULL;
|
|
}
|
|
free(top);
|
|
}
|
|
else
|
|
{
|
|
result = NULL;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
static void
|
|
push(Stack *stack, unsigned char *str)
|
|
{
|
|
StackEntry *entry;
|
|
|
|
entry = calloc(sizeof(StackEntry), 1);
|
|
if (!entry)
|
|
{
|
|
fprintf(stderr, "cannot calloc StackEntry\n");
|
|
exit(0);
|
|
}
|
|
entry->str = str;
|
|
entry->next = NULL;
|
|
entry->previous = stack->top;
|
|
if (stack->top)
|
|
{
|
|
stack->top->next = entry;
|
|
}
|
|
stack->top = entry;
|
|
if (!stack->bottom)
|
|
{
|
|
stack->bottom = entry;
|
|
}
|
|
}
|
|
|
|
static unsigned char *
|
|
bottom(Stack *stack)
|
|
{
|
|
StackEntry *bottom;
|
|
unsigned char *result;
|
|
|
|
bottom = stack->bottom;
|
|
if (bottom)
|
|
{
|
|
result = bottom->str;
|
|
stack->bottom = bottom->next;
|
|
if (stack->bottom)
|
|
{
|
|
stack->bottom->previous = NULL;
|
|
}
|
|
free(bottom);
|
|
}
|
|
else
|
|
{
|
|
result = NULL;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
static Stack *
|
|
stackAlloc(void)
|
|
{
|
|
Stack *stack;
|
|
|
|
stack = calloc(sizeof(Stack), 1);
|
|
if (!stack)
|
|
{
|
|
fprintf(stderr, "cannot calloc Stack\n");
|
|
exit(0);
|
|
}
|
|
|
|
return stack;
|
|
}
|
|
|
|
static void
|
|
stackFree(Stack *stack)
|
|
{
|
|
free(stack);
|
|
}
|
|
|
|
static void
|
|
urlCanonicalizePath(URL *url)
|
|
{
|
|
int absolute;
|
|
unsigned char *begin;
|
|
unsigned char *p;
|
|
unsigned char *slash;
|
|
Stack *stack;
|
|
unsigned char *str;
|
|
|
|
p = url->path;
|
|
if ((!p) || (!*p))
|
|
{
|
|
return;
|
|
}
|
|
if (p[0] == '/')
|
|
{
|
|
absolute = 1;
|
|
p++;
|
|
}
|
|
else
|
|
{
|
|
absolute = 0;
|
|
}
|
|
|
|
stack = stackAlloc();
|
|
while (*p)
|
|
{
|
|
begin = p;
|
|
p = (unsigned char *) strchr((char *) begin, '/');
|
|
if (!p)
|
|
{
|
|
p = (unsigned char *) strchr((char *) begin, 0);
|
|
}
|
|
if (p == begin)
|
|
{
|
|
}
|
|
else if ((p == begin + 1) && (begin[0] == '.'))
|
|
{
|
|
}
|
|
else if
|
|
(
|
|
(p == begin + 2) &&
|
|
(begin[0] == '.') &&
|
|
(begin[1] == '.')
|
|
)
|
|
{
|
|
slash = pop(stack);
|
|
str = pop(stack);
|
|
if (!str)
|
|
{
|
|
push(stack, copyString((unsigned char *) ".."));
|
|
if (*p)
|
|
{
|
|
push(stack, copyString(
|
|
(unsigned char *) "/"));
|
|
}
|
|
}
|
|
else if (!strcmp((char *) str, ".."))
|
|
{
|
|
push(stack, str);
|
|
push(stack, slash);
|
|
push(stack, copyString((unsigned char *) ".."));
|
|
if (*p)
|
|
{
|
|
push(stack, copyString(
|
|
(unsigned char *) "/"));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
free(slash);
|
|
free(str);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
push(stack, copySizedString(begin, p - begin));
|
|
if (*p)
|
|
{
|
|
push(stack, copyString((unsigned char *) "/"));
|
|
}
|
|
}
|
|
if (*p)
|
|
{
|
|
p++;
|
|
}
|
|
}
|
|
|
|
if (absolute)
|
|
{
|
|
url->path[0] = '/';
|
|
url->path[1] = 0;
|
|
}
|
|
else
|
|
{
|
|
url->path[0] = 0;
|
|
}
|
|
while (1)
|
|
{
|
|
p = bottom(stack);
|
|
if (p)
|
|
{
|
|
strcat((char *) url->path, (char *) p);
|
|
free(p);
|
|
}
|
|
else
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
stackFree(stack);
|
|
}
|
|
|
|
URL *
|
|
urlRelative(const unsigned char *baseURL, const unsigned char *relativeURL)
|
|
{
|
|
URL *base;
|
|
int len;
|
|
URL *rel;
|
|
unsigned char *tmp;
|
|
|
|
if ((!baseURL) || (!*baseURL))
|
|
{
|
|
return urlParse(relativeURL);
|
|
}
|
|
if ((!relativeURL) || (!*relativeURL))
|
|
{
|
|
return urlParse(baseURL);
|
|
}
|
|
rel = urlParse(relativeURL);
|
|
if (rel->scheme)
|
|
{
|
|
return rel;
|
|
}
|
|
else
|
|
{
|
|
base = urlParse(baseURL);
|
|
if (base->scheme)
|
|
{
|
|
rel->scheme = copyString(base->scheme);
|
|
}
|
|
else
|
|
{
|
|
/* XXX Base is supposed to have scheme. Oh well. */
|
|
return rel;
|
|
}
|
|
}
|
|
if (rel->net_loc)
|
|
{
|
|
goto step7;
|
|
}
|
|
else
|
|
{
|
|
rel->net_loc = copyString(base->net_loc);
|
|
}
|
|
if (rel->path && rel->path[0] == '/')
|
|
{
|
|
goto step7;
|
|
}
|
|
if ((!rel->path) || (!*rel->path))
|
|
{
|
|
FREE(rel->path);
|
|
rel->path = copyString(base->path);
|
|
if (rel->params)
|
|
{
|
|
goto step7;
|
|
}
|
|
rel->params = copyString(base->params);
|
|
if (rel->query)
|
|
{
|
|
goto step7;
|
|
}
|
|
rel->query = copyString(base->query);
|
|
goto step7;
|
|
}
|
|
if (base->pathWithoutFile)
|
|
{
|
|
tmp = rel->path;
|
|
rel->path = appendString(base->pathWithoutFile, rel->path);
|
|
FREE(tmp);
|
|
}
|
|
urlCanonicalizePath(rel);
|
|
|
|
step7:
|
|
len = strlen((char *) rel->scheme);
|
|
len += 1; /* ":" */
|
|
if (rel->net_loc)
|
|
{
|
|
len += 2 + strlen((char *) rel->net_loc); /* "//net_loc" */
|
|
}
|
|
if (rel->path)
|
|
{
|
|
len += strlen((char *) rel->path);
|
|
}
|
|
if (rel->params)
|
|
{
|
|
len += strlen((char *) rel->params);
|
|
}
|
|
if (rel->query)
|
|
{
|
|
len += strlen((char *) rel->query);
|
|
}
|
|
if (rel->fragment)
|
|
{
|
|
len += strlen((char *) rel->fragment);
|
|
}
|
|
FREE(rel->url);
|
|
rel->url = calloc(len + 1, 1);
|
|
if (!rel->url)
|
|
{
|
|
fprintf(stderr, "cannot calloc url\n");
|
|
exit(0);
|
|
}
|
|
strcpy((char *) rel->url, (char *) rel->scheme);
|
|
strcat((char *) rel->url, ":");
|
|
if (rel->net_loc)
|
|
{
|
|
strcat((char *) rel->url, "//");
|
|
strcat((char *) rel->url, (char *) rel->net_loc);
|
|
}
|
|
if (rel->path)
|
|
{
|
|
strcat((char *) rel->url, (char *) rel->path);
|
|
}
|
|
if (rel->params)
|
|
{
|
|
strcat((char *) rel->url, (char *) rel->params);
|
|
}
|
|
if (rel->query)
|
|
{
|
|
strcat((char *) rel->url, (char *) rel->query);
|
|
}
|
|
if (rel->fragment)
|
|
{
|
|
strcat((char *) rel->url, (char *) rel->fragment);
|
|
}
|
|
|
|
urlEmbellish(rel);
|
|
|
|
urlFree(base);
|
|
|
|
return rel;
|
|
}
|
|
|
|
void
|
|
urlDecode(unsigned char *url)
|
|
{
|
|
unsigned char c;
|
|
unsigned char *in;
|
|
unsigned char *out;
|
|
int tmp;
|
|
|
|
in = url;
|
|
out = url;
|
|
while (1)
|
|
{
|
|
c = *in++;
|
|
if (!c)
|
|
{
|
|
break;
|
|
}
|
|
else if (c == '%')
|
|
{
|
|
sscanf((char *) in, "%02x", &tmp);
|
|
if (*in)
|
|
{
|
|
in++;
|
|
if (*in)
|
|
{
|
|
in++;
|
|
}
|
|
}
|
|
*out++ = tmp;
|
|
}
|
|
else
|
|
{
|
|
*out++ = c;
|
|
}
|
|
}
|
|
*out++ = 0;
|
|
}
|
|
|
|
#ifdef URL_TEST
|
|
|
|
static unsigned char *baseURLTest = "http://a/b/c/d;p?q#f";
|
|
|
|
static char *relativeURLTests[] =
|
|
{
|
|
"g:h", "g:h",
|
|
"g", "http://a/b/c/g",
|
|
"./g", "http://a/b/c/g",
|
|
"g/", "http://a/b/c/g/",
|
|
"/g", "http://a/g",
|
|
"//g", "http://g",
|
|
"?y", "http://a/b/c/d;p?y",
|
|
"g?y", "http://a/b/c/g?y",
|
|
"g?y/./x", "http://a/b/c/g?y/./x",
|
|
"#s", "http://a/b/c/d;p?q#s",
|
|
"g#s", "http://a/b/c/g#s",
|
|
"g#s/./x", "http://a/b/c/g#s/./x",
|
|
"g?y#s", "http://a/b/c/g?y#s",
|
|
";x", "http://a/b/c/d;x",
|
|
"g;x", "http://a/b/c/g;x",
|
|
"g;x?y#s", "http://a/b/c/g;x?y#s",
|
|
".", "http://a/b/c/",
|
|
"./", "http://a/b/c/",
|
|
"..", "http://a/b/",
|
|
"../", "http://a/b/",
|
|
"../g", "http://a/b/g",
|
|
"../..", "http://a/",
|
|
"../../", "http://a/",
|
|
"../../g", "http://a/g",
|
|
"", "http://a/b/c/d;p?q#f",
|
|
"../../../g", "http://a/../g",
|
|
"../../../../g", "http://a/../../g",
|
|
"/./g", "http://a/./g",
|
|
"/../g", "http://a/../g",
|
|
"g.", "http://a/b/c/g.",
|
|
".g", "http://a/b/c/.g",
|
|
"g..", "http://a/b/c/g..",
|
|
"..g", "http://a/b/c/..g",
|
|
"./../g", "http://a/b/g",
|
|
"./g/.", "http://a/b/c/g/",
|
|
"g/./h", "http://a/b/c/g/h",
|
|
"g/../h", "http://a/b/c/h",
|
|
"http:g", "http:g",
|
|
"http:", "http:",
|
|
NULL
|
|
};
|
|
|
|
static unsigned char *loginTest =
|
|
"ftp://user:password@ftp.domain.com:64000/path1/path2/file#fragment";
|
|
|
|
static void
|
|
printURL(URL *url)
|
|
{
|
|
printf("url %s\n", url->url);
|
|
printf("scheme %s, ", url->scheme ? url->scheme : "NULL");
|
|
printf("login %s, ", url->login ? url->login : "NULL");
|
|
printf("password %s, ", url->password ? url->password : "NULL");
|
|
printf("host %s, ", url->host ? url->host : "NULL");
|
|
printf("port %d, ", url->port);
|
|
printf("path %s, ", url->path ? url->path : "NULL");
|
|
printf("file %s, ", url->file ? url->file : "NULL");
|
|
printf("fragment %s\n", url->fragment ? url->fragment : "NULL");
|
|
printf("======================================\n");
|
|
}
|
|
|
|
int
|
|
main(int argc, char *argv[])
|
|
{
|
|
int failures;
|
|
char **p;
|
|
int total;
|
|
URL *url;
|
|
|
|
printURL(urlParse(loginTest));
|
|
|
|
failures = 0;
|
|
total = 0;
|
|
|
|
p = relativeURLTests;
|
|
while (p[0])
|
|
{
|
|
total++;
|
|
url = urlRelative(baseURLTest, p[0]);
|
|
if (url)
|
|
{
|
|
if (strcmp((char *) url->url, p[1]))
|
|
{
|
|
failures++;
|
|
printf("urlRelative failed:\n");
|
|
printf("\"%s\" +\n", baseURLTest);
|
|
printf("\"%s\" =\n", p[0]);
|
|
printf("\"%s\"\n", url->url);
|
|
printf("should be:\n");
|
|
printf("\"%s\"\n", p[1]);
|
|
printf("-------------------\n");
|
|
}
|
|
urlFree(url);
|
|
}
|
|
else
|
|
{
|
|
failures++;
|
|
printf("urlRelative return NULL for \"%s\"\n", p[0]);
|
|
printf("----------------------------------\n");
|
|
}
|
|
p += 2;
|
|
}
|
|
printf("%d failures out of %d\n", failures, total);
|
|
|
|
return 0;
|
|
}
|
|
|
|
#endif /* URL_TEST */
|