2011-11-03 02:35:04 +00:00
|
|
|
// This file is part of BOINC.
|
|
|
|
// http://boinc.berkeley.edu
|
|
|
|
// Copyright (C) 2009 University of California
|
|
|
|
//
|
|
|
|
// BOINC is free software; you can redistribute it and/or modify it
|
|
|
|
// under the terms of the GNU Lesser General Public License
|
|
|
|
// as published by the Free Software Foundation,
|
|
|
|
// either version 3 of the License, or (at your option) any later version.
|
|
|
|
//
|
|
|
|
// BOINC is distributed in the hope that it will be useful,
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
// See the GNU Lesser General Public License for more details.
|
|
|
|
//
|
|
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
|
|
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
#if defined(_WIN32) && !defined(__STDWX_H__)
|
|
|
|
#include "boinc_win.h"
|
|
|
|
#elif defined(_WIN32) && defined(__STDWX_H__)
|
|
|
|
#include "stdwx.h"
|
|
|
|
#else
|
|
|
|
#include "config.h"
|
|
|
|
#include <string>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <ctype.h>
|
2012-07-02 18:51:02 +00:00
|
|
|
#include <stdlib.h>
|
2011-11-03 02:35:04 +00:00
|
|
|
#endif
|
2013-05-28 06:08:02 +00:00
|
|
|
#ifdef _MSC_VER
|
|
|
|
#define snprintf _snprintf
|
|
|
|
#endif
|
2011-11-03 02:35:04 +00:00
|
|
|
|
|
|
|
#include "str_util.h"
|
|
|
|
#include "str_replace.h"
|
|
|
|
|
2012-08-11 05:47:18 +00:00
|
|
|
#include "url.h"
|
|
|
|
|
2011-11-03 02:35:04 +00:00
|
|
|
using std::string;
|
|
|
|
|
|
|
|
// Break a URL down into its protocol, server, port and file components
|
|
|
|
// URL format:
|
|
|
|
// [{http|https|socks}://][user[:passwd]@]host.dom.dom[:port][/dir/file]
|
|
|
|
//
|
|
|
|
void parse_url(const char* url, PARSED_URL& purl) {
|
|
|
|
char* p, *q, *buf;
|
|
|
|
char _buf[256];
|
|
|
|
|
|
|
|
// strip off the protocol if present
|
|
|
|
//
|
|
|
|
if (strncmp(url, "http://", 7) == 0) {
|
|
|
|
safe_strcpy(_buf, url+7);
|
|
|
|
purl.protocol = URL_PROTOCOL_HTTP;
|
|
|
|
} else if (strncmp(url, "https://", 8) == 0) {
|
|
|
|
safe_strcpy(_buf, url+8);
|
|
|
|
purl.protocol = URL_PROTOCOL_HTTPS;
|
|
|
|
} else if (strncmp(url, "socks://", 8) == 0) {
|
|
|
|
safe_strcpy(_buf, url+8);
|
|
|
|
purl.protocol = URL_PROTOCOL_SOCKS;
|
|
|
|
} else {
|
|
|
|
safe_strcpy(_buf, url);
|
|
|
|
purl.protocol = URL_PROTOCOL_UNKNOWN;
|
|
|
|
}
|
|
|
|
buf = _buf;
|
|
|
|
|
|
|
|
// parse user name and password
|
|
|
|
//
|
|
|
|
strcpy(purl.user, "");
|
|
|
|
strcpy(purl.passwd, "");
|
|
|
|
p = strchr(buf, '@');
|
|
|
|
if (p) {
|
|
|
|
*p = 0;
|
|
|
|
q = strchr(buf, ':');
|
|
|
|
if (q) {
|
|
|
|
*q = 0;
|
2013-06-04 03:24:48 +00:00
|
|
|
safe_strcpy(purl.user, buf);
|
|
|
|
safe_strcpy(purl.passwd, q+1);
|
2011-11-03 02:35:04 +00:00
|
|
|
} else {
|
2013-06-04 03:24:48 +00:00
|
|
|
safe_strcpy(purl.user, buf);
|
2011-11-03 02:35:04 +00:00
|
|
|
}
|
|
|
|
buf = p+1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// parse and strip off file part if present
|
|
|
|
//
|
|
|
|
p = strchr(buf, '/');
|
|
|
|
if (p) {
|
2013-06-04 03:24:48 +00:00
|
|
|
safe_strcpy(purl.file, p+1);
|
2011-11-03 02:35:04 +00:00
|
|
|
*p = 0;
|
|
|
|
} else {
|
2013-06-04 03:24:48 +00:00
|
|
|
safe_strcpy(purl.file, "");
|
2011-11-03 02:35:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// parse and strip off port if present
|
|
|
|
//
|
|
|
|
p = strchr(buf,':');
|
|
|
|
if (p) {
|
|
|
|
purl.port = atol(p+1);
|
|
|
|
*p = 0;
|
|
|
|
} else {
|
|
|
|
// CMC note: if they didn't pass in a port #,
|
|
|
|
// but the url starts with https://, assume they
|
|
|
|
// want a secure port (HTTPS, port 443)
|
|
|
|
purl.port = (purl.protocol == URL_PROTOCOL_HTTPS) ? 443 : 80;
|
|
|
|
}
|
|
|
|
|
|
|
|
// what remains is the host
|
|
|
|
//
|
2013-06-04 03:24:48 +00:00
|
|
|
safe_strcpy(purl.host, buf);
|
2011-11-03 02:35:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static char x2c(char *what) {
|
|
|
|
register char digit;
|
|
|
|
|
|
|
|
digit = (what[0] >= 'A' ? ((what[0] & 0xdf) - 'A')+10 : (what[0] - '0'));
|
|
|
|
digit *= 16;
|
|
|
|
digit += (what[1] >= 'A' ? ((what[1] & 0xdf) - 'A')+10 : (what[1] - '0'));
|
|
|
|
return(digit);
|
|
|
|
}
|
|
|
|
|
|
|
|
void c2x(char *what) {
|
|
|
|
char buf[3];
|
|
|
|
char num = atoi(what);
|
|
|
|
char d1 = num / 16;
|
|
|
|
char d2 = num % 16;
|
|
|
|
int abase1, abase2;
|
|
|
|
if (d1 < 10) abase1 = 48;
|
|
|
|
else abase1 = 55;
|
|
|
|
if (d2 < 10) abase2 = 48;
|
|
|
|
else abase2 = 55;
|
|
|
|
buf[0] = d1+abase1;
|
|
|
|
buf[1] = d2+abase2;
|
|
|
|
buf[2] = 0;
|
|
|
|
|
|
|
|
strcpy(what, buf);
|
|
|
|
}
|
|
|
|
|
2013-05-28 06:08:02 +00:00
|
|
|
// The following functions do "URL-escaping", i.e. escaping GET arguments
|
|
|
|
// to be passed in a URL
|
2011-11-03 02:35:04 +00:00
|
|
|
|
2013-05-28 06:08:02 +00:00
|
|
|
// size not really needed since unescaping can only shrink
|
|
|
|
//
|
|
|
|
void unescape_url(char *url, int url_size) {
|
2011-11-03 02:35:04 +00:00
|
|
|
int x,y;
|
|
|
|
|
|
|
|
for (x=0,y=0; url[y] && (x<url_size);++x,++y) {
|
|
|
|
if ((url[x] = url[y]) == '%') {
|
|
|
|
url[x] = x2c(&url[y+1]);
|
|
|
|
y+=2;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
url[x] = '\0';
|
|
|
|
}
|
|
|
|
|
2013-05-28 06:08:02 +00:00
|
|
|
// unescape_url needs to be able to handle potentially hostile URLs
|
|
|
|
//
|
2011-11-03 02:35:04 +00:00
|
|
|
void unescape_url(string& url) {
|
|
|
|
char buf[1024];
|
2013-05-28 06:08:02 +00:00
|
|
|
strlcpy(buf, url.c_str(), sizeof(buf));
|
|
|
|
unescape_url(buf, sizeof(buf));
|
2011-11-03 02:35:04 +00:00
|
|
|
url = buf;
|
|
|
|
}
|
|
|
|
|
2013-05-28 06:08:02 +00:00
|
|
|
void escape_url(const char *in, char*out, int out_size) {
|
2011-11-03 02:35:04 +00:00
|
|
|
int x, y;
|
2013-05-28 06:08:02 +00:00
|
|
|
for (x=0, y=0; in[x] && (y<out_size-3); ++x) {
|
2011-11-03 02:35:04 +00:00
|
|
|
if (isalnum(in[x])) {
|
|
|
|
out[y] = in[x];
|
|
|
|
++y;
|
|
|
|
} else {
|
|
|
|
out[y] = '%';
|
|
|
|
++y;
|
|
|
|
out[y] = 0;
|
|
|
|
char buf[256];
|
|
|
|
sprintf(buf, "%d", (char)in[x]);
|
|
|
|
c2x(buf);
|
|
|
|
strcat(out, buf);
|
|
|
|
y += 2;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
out[y] = 0;
|
|
|
|
}
|
|
|
|
|
2013-05-28 06:08:02 +00:00
|
|
|
// escape_url needs to be able to handle potentially hostile URLs
|
|
|
|
//
|
2011-11-03 02:35:04 +00:00
|
|
|
void escape_url(string& url) {
|
|
|
|
char buf[1024];
|
2013-05-28 06:08:02 +00:00
|
|
|
escape_url(url.c_str(), buf, sizeof(buf));
|
2011-11-03 02:35:04 +00:00
|
|
|
url = buf;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Escape a URL for the project directory, cutting off the "http://",
|
|
|
|
// converting everthing other than alphanumbers, ., - and _ to "_".
|
|
|
|
//
|
|
|
|
void escape_url_readable(char *in, char* out) {
|
|
|
|
int x, y;
|
|
|
|
char *temp;
|
|
|
|
|
|
|
|
temp = strstr(in,"://");
|
|
|
|
if (temp) {
|
|
|
|
in = temp + strlen("://");
|
|
|
|
}
|
|
|
|
for (x=0, y=0; in[x]; ++x) {
|
|
|
|
if (isalnum(in[x]) || in[x]=='.' || in[x]=='-' || in[x]=='_') {
|
|
|
|
out[y] = in[x];
|
|
|
|
++y;
|
|
|
|
} else {
|
|
|
|
out[y] = '_';
|
|
|
|
++y;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
out[y] = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Canonicalize a master url.
|
|
|
|
// - Convert the first part of a URL (before the "://") to http://,
|
|
|
|
// or prepend it
|
|
|
|
// - Remove double slashes in the rest
|
|
|
|
// - Add a trailing slash if necessary
|
|
|
|
//
|
2013-05-28 06:08:02 +00:00
|
|
|
void canonicalize_master_url(char* url, int len) {
|
2011-11-03 02:35:04 +00:00
|
|
|
char buf[1024];
|
|
|
|
size_t n;
|
|
|
|
bool bSSL = false; // keep track if they sent in https://
|
|
|
|
|
|
|
|
char *p = strstr(url, "://");
|
|
|
|
if (p) {
|
|
|
|
bSSL = (bool) (p == url + 5);
|
2013-05-28 06:08:02 +00:00
|
|
|
strlcpy(buf, p+3, sizeof(buf));
|
2011-11-03 02:35:04 +00:00
|
|
|
} else {
|
2013-05-28 06:08:02 +00:00
|
|
|
strlcpy(buf, url, sizeof(buf));
|
2011-11-03 02:35:04 +00:00
|
|
|
}
|
|
|
|
while (1) {
|
|
|
|
p = strstr(buf, "//");
|
|
|
|
if (!p) break;
|
|
|
|
strcpy_overlap(p, p+1);
|
|
|
|
}
|
|
|
|
n = strlen(buf);
|
2013-05-28 06:08:02 +00:00
|
|
|
if (buf[n-1] != '/' && (n<sizeof(buf)-2)) {
|
2011-11-03 02:35:04 +00:00
|
|
|
strcat(buf, "/");
|
|
|
|
}
|
2013-05-28 06:08:02 +00:00
|
|
|
snprintf(url, len, "http%s://%s", (bSSL ? "s" : ""), buf);
|
|
|
|
url[len-1] = 0;
|
2011-11-03 02:35:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void canonicalize_master_url(string& url) {
|
|
|
|
char buf[1024];
|
2013-06-04 03:24:48 +00:00
|
|
|
safe_strcpy(buf, url.c_str());
|
2013-05-28 06:08:02 +00:00
|
|
|
canonicalize_master_url(buf, sizeof(buf));
|
2011-11-03 02:35:04 +00:00
|
|
|
url = buf;
|
|
|
|
}
|
|
|
|
|
|
|
|
// is the string a valid master URL, in canonical form?
|
|
|
|
//
|
|
|
|
bool valid_master_url(char* buf) {
|
|
|
|
char* p, *q;
|
|
|
|
size_t n;
|
|
|
|
bool bSSL = false;
|
|
|
|
|
|
|
|
p = strstr(buf, "http://");
|
|
|
|
if (p != buf) {
|
|
|
|
// allow https
|
|
|
|
p = strstr(buf, "https://");
|
|
|
|
if (p == buf) {
|
|
|
|
bSSL = true;
|
|
|
|
} else {
|
|
|
|
return false; // no http or https, it's bad!
|
|
|
|
}
|
|
|
|
}
|
|
|
|
q = p+strlen(bSSL ? "https://" : "http://");
|
|
|
|
p = strstr(q, ".");
|
|
|
|
if (!p) return false;
|
|
|
|
if (p == q) return false;
|
|
|
|
q = p+1;
|
|
|
|
p = strstr(q, "/");
|
|
|
|
if (!p) return false;
|
|
|
|
if (p == q) return false;
|
|
|
|
n = strlen(buf);
|
|
|
|
if (buf[n-1] != '/') return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void escape_project_url(char *in, char* out) {
|
|
|
|
escape_url_readable(in, out);
|
|
|
|
char& last = out[strlen(out)-1];
|
|
|
|
// remove trailing _
|
|
|
|
if (last == '_') {
|
|
|
|
last = '\0';
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-10-28 21:06:49 +00:00
|
|
|
bool is_https(const char* url) {
|
|
|
|
return (strncmp(url, "https://", 8) == 0);
|
|
|
|
}
|