Canonicalize URLs to lower case

This commit is contained in:
Hristo Georgiev 2022-12-02 21:06:05 +02:00 committed by Laurence
parent c0488d63f0
commit 95b5ec0767
3 changed files with 40 additions and 37 deletions

View File

@ -908,43 +908,14 @@ static void handle_project_attach(GUI_RPC_CONN& grc) {
}
}
// remove http(s):// at the beginning of project address
// there is no reason to connect to secure address project
// if we're already connected to the non-secure address
// or vice versa
// also clear last '/' character if present
const string http = "http://";
const string https = "https://";
string new_project_url = url;
size_t pos = new_project_url.find(http);
if (pos != string::npos) {
new_project_url.erase(pos, http.length());
}
else if ((pos = new_project_url.find(https)) != string::npos) {
new_project_url.erase(pos, https.length());
}
if (new_project_url.length() >= 1 && new_project_url[new_project_url.length() - 1] == '/') {
new_project_url.erase(new_project_url.length() - 1, 1);
}
canonicalize_master_url(url);
for (i=0; i<gstate.projects.size(); i++) {
PROJECT* p = gstate.projects[i];
string project_url = p->master_url;
canonicalize_master_url(project_url);
pos = project_url.find(http);
if (pos != string::npos) {
project_url.erase(pos, http.length());
}
else if ((pos = project_url.find(https)) != string::npos) {
project_url.erase(pos, https.length());
}
if (project_url.length() >= 1 && project_url[project_url.length() - 1] == '/') {
project_url.erase(project_url.length() - 1, 1);
}
if (new_project_url == project_url) {
if (url == project_url) {
already_attached = true;
break;
}

View File

@ -203,6 +203,7 @@ void escape_url_readable(char *in, char* out) {
// or prepend it
// - Remove double slashes in the rest
// - Add a trailing slash if necessary
// - Convert all alphabet characters to lower case
//
void canonicalize_master_url(char* url, int len) {
char buf[1024];
@ -225,6 +226,11 @@ void canonicalize_master_url(char* url, int len) {
if (buf[n-1] != '/' && (n<sizeof(buf)-2)) {
safe_strcat(buf, "/");
}
for (size_t i=0; i<n-1; i++) {
// stop converting to lower-case, if we've reached the boundary of the domain name
if (buf[i] == '/') break;
buf[i] = tolower(static_cast<unsigned char>(buf[i]));
}
snprintf(url, len, "http%s://%s", (bSSL ? "s" : ""), buf);
url[len-1] = 0;
}

View File

@ -143,30 +143,56 @@ namespace test_url {
}
TEST_F(test_url, canonicalize_master_url) {
//Test to make sure a already good result comes back the same.
//Test to make sure an already good result comes back the same.
string url = "http://secure.example.com/";
canonicalize_master_url(url);
EXPECT_STREQ(url.c_str(), "http://secure.example.com/");
//Test that https works, also adds trailing /
//Test to make sure an already good mixed-case result comes back lower-cased.
url = "http://SeCuRe.eXamPle.coM/";
canonicalize_master_url(url);
EXPECT_STREQ(url.c_str(), "http://secure.example.com/");
//Test that https works, also adds trailing /.
url = "https://secure.example.com";
canonicalize_master_url(url);
EXPECT_STREQ(url.c_str(), "https://secure.example.com/");
//Test if someone forgot the leading http://
//Test that https works, in a mixed-case scenario, also adds trailing /.
url = "https://sEcUre.exaMple.cOm";
canonicalize_master_url(url);
EXPECT_STREQ(url.c_str(), "https://secure.example.com/");
//Test if someone forgot the leading http://.
url = "www.example.com/";
canonicalize_master_url(url);
EXPECT_STREQ(url.c_str(), "http://www.example.com/");
//Test removing extra slashes. And changing socks to https.
//Test omitted http:// and mixed case.
url = "wwW.exaMple.cOm/";
canonicalize_master_url(url);
EXPECT_STREQ(url.c_str(), "http://www.example.com/");
//Test removing extra slashes and changing socks to https.
url = "socks://sock.example.com////////hello";
canonicalize_master_url(url);
EXPECT_STREQ(url.c_str(), "https://sock.example.com/hello/");
//Test if poorly written url
//Test removing extra slashes and changing socks to https, in a mixed-case scenario.
//Mixed-case characters after the domain name remain unaffected.
url = "sOcks://Sock.exaMPle.com////////hElLO";
canonicalize_master_url(url);
EXPECT_STREQ(url.c_str(), "https://sock.example.com/hElLO/");
//Test invalid protocol.
url = "h://bad.example.com/";
canonicalize_master_url(url);
EXPECT_STREQ(url.c_str(), "http://bad.example.com/");
//Test invalid protocol and mixed case.
url = "H://baD.exampLE.Com/";
canonicalize_master_url(url);
EXPECT_STREQ(url.c_str(), "http://bad.example.com/");
}
TEST_F(test_url, valid_master_url) {