From 4928cc9623f6d02a56ddf95aca38a09275c91415 Mon Sep 17 00:00:00 2001 From: Keith Uplinger Date: Wed, 29 May 2019 15:02:36 -0500 Subject: [PATCH] Fix to xml_unescape function for multiple escape values. Added unit tests for parse.cpp to prove xml_unescape is working properly. --- lib/parse.cpp | 35 ++++++++++++---- tests/unit-tests/lib/test_parse.cpp | 63 +++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+), 7 deletions(-) create mode 100644 tests/unit-tests/lib/test_parse.cpp diff --git a/lib/parse.cpp b/lib/parse.cpp index 6146d26ab7..5727c6d15f 100644 --- a/lib/parse.cpp +++ b/lib/parse.cpp @@ -403,7 +403,9 @@ void xml_unescape(char* buf) { char* out = buf; char* in = buf; char* p; + bool goodescape; while (*in) { + goodescape = false; if (*in != '&') { // avoid strncmp's if possible *out++ = *in++; } else if (!strncmp(in, "<", 4)) { @@ -412,10 +414,10 @@ void xml_unescape(char* buf) { } else if (!strncmp(in, ">", 4)) { *out++ = '>'; in += 4; - } else if (!strncmp(in, """, 4)) { + } else if (!strncmp(in, """, 6)) { *out++ = '"'; in += 6; - } else if (!strncmp(in, "'", 4)) { + } else if (!strncmp(in, "'", 6)) { *out++ = '\''; in += 6; } else if (!strncmp(in, "&", 5)) { @@ -428,14 +430,33 @@ void xml_unescape(char* buf) { *out++ = '\n'; in += 5; } else if (!strncmp(in, "&#", 2)) { + //If escape is poorly formed or outside of char size, then print as is. in += 2; - char c = atoi(in); - *out++ = c; p = strchr(in, ';'); - if (p) { - in = p+1; + if (!p || *in == ';') { //No end semicolon found or it was formatted as &#; + *out++ = '&'; + *out++ = '#'; } else { - while (isdigit(*in)) in++; + //Check that escape is formed correctly + for (unsigned int i = 0; i < 4 || i < strlen(in); i++) { + if (!isdigit(*(in + i)) && *(in + i) != ';') { + //Found something other than a single digit. + break; + } + if (*(in + i) == ';') { + goodescape = true; + break; + } + } + int ascii = atoi(in); + + if (goodescape && ascii < 256) { + *out++ = ascii; + in = p + 1; + } else { + *out++ = '&'; + *out++ = '#'; + } } } else { *out++ = *in++; diff --git a/tests/unit-tests/lib/test_parse.cpp b/tests/unit-tests/lib/test_parse.cpp new file mode 100644 index 0000000000..1a63546377 --- /dev/null +++ b/tests/unit-tests/lib/test_parse.cpp @@ -0,0 +1,63 @@ +#include "gtest/gtest.h" +#include "common_defs.h" +#include "url.h" +#include +#include + +using namespace std; + +namespace test_parse { + + // The fixture for testing class Foo. + + class test_parse : public ::testing::Test { + protected: + // You can remove any or all of the following functions if its body + // is empty. + + test_parse() { + // You can do set-up work for each test here. + } + + virtual ~test_parse() { + // You can do clean-up work that doesn't throw exceptions here. + } + + // If the constructor and destructor are not enough for setting up + // and cleaning up each test, you can define the following methods: + + virtual void SetUp() { + // Code here will be called immediately after the constructor (right + // before each test). + } + + virtual void TearDown() { + // Code here will be called immediately after each test (right + // before the destructor). + } + + // Objects declared here can be used by all tests in the test case for Foo. + }; + + // Tests that Foo does Xyz. + + TEST_F(test_parse, xml_unescape) { + string test = "<>"'& K"; + string answer = "<>\"\'&\r\r\n\nK"; + xml_unescape(test); + EXPECT_EQ(test, answer); + + //Note: this is to check that partial values don't pass strncmp for previously bad compares. + test = "&quoYIKES&apoBOO"; + answer = "&quoYIKES&apoBOO"; + xml_unescape(test); + EXPECT_EQ(test, answer); + + //Testing the ascii conversion unknown. + test = " s3;�&#;eqӒK"; + answer = " s3;�&#;eqӒK"; + xml_unescape(test); + EXPECT_EQ(test, answer); + } + +} // namespace