Fix to xml_unescape function for multiple escape values.

Added unit tests for parse.cpp to prove xml_unescape is working properly.
This commit is contained in:
Keith Uplinger 2019-05-29 15:02:36 -05:00
parent be63e62ede
commit 4928cc9623
2 changed files with 91 additions and 7 deletions

View File

@ -403,7 +403,9 @@ void xml_unescape(char* buf) {
char* out = buf;
char* in = buf;
char* p;
bool goodescape;
while (*in) {
goodescape = false;
if (*in != '&') { // avoid strncmp's if possible
*out++ = *in++;
} else if (!strncmp(in, "<", 4)) {
@ -412,10 +414,10 @@ void xml_unescape(char* buf) {
} else if (!strncmp(in, ">", 4)) {
*out++ = '>';
in += 4;
} else if (!strncmp(in, """, 4)) {
} else if (!strncmp(in, """, 6)) {
*out++ = '"';
in += 6;
} else if (!strncmp(in, "'", 4)) {
} else if (!strncmp(in, "'", 6)) {
*out++ = '\'';
in += 6;
} else if (!strncmp(in, "&", 5)) {
@ -428,14 +430,33 @@ void xml_unescape(char* buf) {
*out++ = '\n';
in += 5;
} else if (!strncmp(in, "&#", 2)) {
//If escape is poorly formed or outside of char size, then print as is.
in += 2;
char c = atoi(in);
*out++ = c;
p = strchr(in, ';');
if (p) {
in = p+1;
if (!p || *in == ';') { //No end semicolon found or it was formatted as &#;
*out++ = '&';
*out++ = '#';
} else {
while (isdigit(*in)) in++;
//Check that escape is formed correctly
for (unsigned int i = 0; i < 4 || i < strlen(in); i++) {
if (!isdigit(*(in + i)) && *(in + i) != ';') {
//Found something other than a single digit.
break;
}
if (*(in + i) == ';') {
goodescape = true;
break;
}
}
int ascii = atoi(in);
if (goodescape && ascii < 256) {
*out++ = ascii;
in = p + 1;
} else {
*out++ = '&';
*out++ = '#';
}
}
} else {
*out++ = *in++;

View File

@ -0,0 +1,63 @@
#include "gtest/gtest.h"
#include "common_defs.h"
#include "url.h"
#include <string>
#include <ios>
using namespace std;
namespace test_parse {
// The fixture for testing class Foo.
class test_parse : public ::testing::Test {
protected:
// You can remove any or all of the following functions if its body
// is empty.
test_parse() {
// You can do set-up work for each test here.
}
virtual ~test_parse() {
// You can do clean-up work that doesn't throw exceptions here.
}
// If the constructor and destructor are not enough for setting up
// and cleaning up each test, you can define the following methods:
virtual void SetUp() {
// Code here will be called immediately after the constructor (right
// before each test).
}
virtual void TearDown() {
// Code here will be called immediately after each test (right
// before the destructor).
}
// Objects declared here can be used by all tests in the test case for Foo.
};
// Tests that Foo does Xyz.
TEST_F(test_parse, xml_unescape) {
string test = "&lt;&gt;&quot;&apos;&amp;&#xD;&#xd;&#xA;&#xa;&#75;";
string answer = "<>\"\'&\r\r\n\nK";
xml_unescape(test);
EXPECT_EQ(test, answer);
//Note: this is to check that partial values don't pass strncmp for previously bad compares.
test = "&quoYIKES&apoBOO";
answer = "&quoYIKES&apoBOO";
xml_unescape(test);
EXPECT_EQ(test, answer);
//Testing the ascii conversion unknown.
test = "&#9s3;&#694312532&#;eq&#1234;&#75";
answer = "&#9s3;&#694312532&#;eq&#1234;&#75";
xml_unescape(test);
EXPECT_EQ(test, answer);
}
} // namespace