mirror of https://github.com/BOINC/boinc.git
320 lines
7.4 KiB
C++
320 lines
7.4 KiB
C++
|
// The contents of this file are subject to the BOINC Public License
|
||
|
// Version 1.0 (the "License"); you may not use this file except in
|
||
|
// compliance with the License. You may obtain a copy of the License at
|
||
|
// http://boinc.berkeley.edu/license_1.0.txt
|
||
|
//
|
||
|
// Software distributed under the License is distributed on an "AS IS"
|
||
|
// basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
|
||
|
// License for the specific language governing rights and limitations
|
||
|
// under the License.
|
||
|
//
|
||
|
// The Original Code is the Berkeley Open Infrastructure for Network Computing.
|
||
|
//
|
||
|
// The Initial Developer of the Original Code is the SETI@home project.
|
||
|
// Portions created by the SETI@home project are Copyright (C) 2002
|
||
|
// University of California at Berkeley. All Rights Reserved.
|
||
|
//
|
||
|
// Contributor(s):
|
||
|
//
|
||
|
|
||
|
#include <cctype>
|
||
|
#include <vector>
|
||
|
#include <string>
|
||
|
#include "xml_util.h"
|
||
|
|
||
|
// Most of these entries are for reverse translation of poorly written HTML.
|
||
|
// Forward translation doesn't translate most printable characters.
|
||
|
const xml_entity xml_trans[]= {
|
||
|
{ 0x07, "&bel;" },
|
||
|
{ 0x0a, "&lf;" },
|
||
|
{ 0x0d, "&cr;" },
|
||
|
{ ' ', "&sp;" },
|
||
|
{ '!', "!" },
|
||
|
{ '\"', """ },
|
||
|
{ '\"', "&dquot;" },
|
||
|
{ '#', "#" },
|
||
|
{ '$', "$" },
|
||
|
{ '%', "%" },
|
||
|
{ '&', "&" },
|
||
|
{ '\'', "'" },
|
||
|
{ '(', "(" },
|
||
|
{ ')', ")" },
|
||
|
{ '*', "*" },
|
||
|
{ '+', "+" },
|
||
|
{ ',', "," },
|
||
|
{ '-', "‐" },
|
||
|
{ '-', "−" },
|
||
|
{ '.', "." },
|
||
|
{ '/', "/" },
|
||
|
{ ':', ":" },
|
||
|
{ ';', ";" },
|
||
|
{ '<', "<" },
|
||
|
{ '=', "=" },
|
||
|
{ '>', ">" },
|
||
|
{ '?', "?" },
|
||
|
{ '@', "@" },
|
||
|
{ '[', "[" },
|
||
|
{ '\\', "\" },
|
||
|
{ ']', "]" },
|
||
|
{ '^', "ˆ" },
|
||
|
{ '_', "_" },
|
||
|
{ '_', "―" },
|
||
|
{ '`', "`" },
|
||
|
{ '{', "{" },
|
||
|
{ '|', "|" },
|
||
|
{ '}', "}" },
|
||
|
{ '~', "˜" },
|
||
|
{ 0x82, "‚" },
|
||
|
{ 0x84, "„" },
|
||
|
{ 0x85, "&ldots;" },
|
||
|
{ 0x8a, "Š" },
|
||
|
{ 0x8b, "‹" },
|
||
|
{ 0x8c, "Œ" },
|
||
|
{ 0x91, "‘" },
|
||
|
{ 0x91, "’" },
|
||
|
{ 0x92, "’" },
|
||
|
{ 0x93, "“" },
|
||
|
{ 0x93, "”" },
|
||
|
{ 0x94, "”" },
|
||
|
{ 0x95, "•" },
|
||
|
{ 0x96, "–" },
|
||
|
{ 0x96, "&endash;" },
|
||
|
{ 0x97, "—" },
|
||
|
{ 0x97, "&emdash;" },
|
||
|
{ 0xa0, " " },
|
||
|
{ 0xa1, "¡" },
|
||
|
{ 0xa2, "¢" },
|
||
|
{ 0xa3, "£" },
|
||
|
{ 0xa4, "¤" },
|
||
|
{ 0xa5, "¥" },
|
||
|
{ 0xa6, "¦" },
|
||
|
{ 0xa7, "§" },
|
||
|
{ 0xa8, "¨" },
|
||
|
{ 0xa9, "©" },
|
||
|
{ 0xaa, "ª" },
|
||
|
{ 0xab, "«" },
|
||
|
{ 0xac, "¬" },
|
||
|
{ 0xad, "­" },
|
||
|
{ 0xae, "®" },
|
||
|
{ 0xaf, "¯" },
|
||
|
{ 0xb0, "°" },
|
||
|
{ 0xb1, "±" },
|
||
|
{ 0xb2, "²" },
|
||
|
{ 0xb3, "³" },
|
||
|
{ 0xb4, "´" },
|
||
|
{ 0xb5, "µ" },
|
||
|
{ 0xb6, "¶" },
|
||
|
{ 0xb7, "·" },
|
||
|
{ 0xb8, "¸" },
|
||
|
{ 0xb9, "¹" },
|
||
|
{ 0xba, "º" },
|
||
|
{ 0xbb, "»" },
|
||
|
{ 0xbc, "¼" },
|
||
|
{ 0xbd, "½" },
|
||
|
{ 0xbe, "¾" },
|
||
|
{ 0xbf, "¿" },
|
||
|
{ 0xc0, "À" },
|
||
|
{ 0xc1, "Á" },
|
||
|
{ 0xc2, "Â" },
|
||
|
{ 0xc3, "Ã" },
|
||
|
{ 0xc4, "Ä" },
|
||
|
{ 0xc5, "Å" },
|
||
|
{ 0xc6, "Æ" },
|
||
|
{ 0xc7, "Ç" },
|
||
|
{ 0xc8, "È" },
|
||
|
{ 0xc9, "É" },
|
||
|
{ 0xca, "Ê" },
|
||
|
{ 0xcb, "Ë" },
|
||
|
{ 0xcc, "Ì" },
|
||
|
{ 0xcd, "Í" },
|
||
|
{ 0xce, "Î" },
|
||
|
{ 0xcf, "Ï" },
|
||
|
{ 0xd0, "Ð" },
|
||
|
{ 0xd1, "Ñ" },
|
||
|
{ 0xd2, "Ò" },
|
||
|
{ 0xd3, "Ó" },
|
||
|
{ 0xd4, "Ô" },
|
||
|
{ 0xd5, "Õ" },
|
||
|
{ 0xd6, "Ö" },
|
||
|
{ 0xd7, "×" },
|
||
|
{ 0xd8, "Ø" },
|
||
|
{ 0xd9, "Ù" },
|
||
|
{ 0xda, "Ú" },
|
||
|
{ 0xdb, "Û" },
|
||
|
{ 0xdc, "Ü" },
|
||
|
{ 0xdd, "Ý" },
|
||
|
{ 0xde, "Þ" },
|
||
|
{ 0xdf, "ß" },
|
||
|
{ 0xe0, "à" },
|
||
|
{ 0xe1, "á" },
|
||
|
{ 0xe2, "â" },
|
||
|
{ 0xe3, "ã" },
|
||
|
{ 0xe4, "ä" },
|
||
|
{ 0xe5, "å" },
|
||
|
{ 0xe6, "æ" },
|
||
|
{ 0xe7, "ç" },
|
||
|
{ 0xe8, "è" },
|
||
|
{ 0xe9, "é" },
|
||
|
{ 0xea, "ê" },
|
||
|
{ 0xeb, "ë" },
|
||
|
{ 0xec, "ì" },
|
||
|
{ 0xed, "í" },
|
||
|
{ 0xee, "î" },
|
||
|
{ 0xef, "ï" },
|
||
|
{ 0xf0, "ð" },
|
||
|
{ 0xf1, "ñ" },
|
||
|
{ 0xf2, "ò" },
|
||
|
{ 0xf3, "ó" },
|
||
|
{ 0xf4, "ô" },
|
||
|
{ 0xf5, "õ" },
|
||
|
{ 0xf6, "ö" },
|
||
|
{ 0xf7, "÷" },
|
||
|
{ 0xf8, "ø" },
|
||
|
{ 0xf9, "ù" },
|
||
|
{ 0xfa, "ú" },
|
||
|
{ 0xfb, "û" },
|
||
|
{ 0xfc, "ü" },
|
||
|
{ 0xfd, "ý" },
|
||
|
{ 0xfe, "þ" },
|
||
|
{ 0xff, "ÿ" },
|
||
|
{ 0x00, 0 }
|
||
|
};
|
||
|
|
||
|
#ifdef HAVE_MAP
|
||
|
#include <map>
|
||
|
|
||
|
std::multimap<unsigned char,const char *> encode_map;
|
||
|
std::map<std::string, unsigned char> decode_map;
|
||
|
|
||
|
void populate_encode_map() {
|
||
|
int i=0;
|
||
|
do {
|
||
|
encode_map.insert(std::make_pair(xml_trans[i].c,xml_trans[i].s));
|
||
|
} while (xml_trans[++i].s);
|
||
|
}
|
||
|
|
||
|
void populate_decode_map() {
|
||
|
int i=0;
|
||
|
do {
|
||
|
decode_map[xml_trans[i].s]=xml_trans[i].c;
|
||
|
} while (xml_trans[++i].s);
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
|
||
|
std::string encode_char(unsigned char c) {
|
||
|
#ifdef HAVE_MAP
|
||
|
if (!(encode_map.size())) populate_encode_map();
|
||
|
std::multimap<unsigned char,const char *>::iterator p=encode_map.find(c);
|
||
|
if (p!=encode_map.end()) {
|
||
|
return (p->second);
|
||
|
} else {
|
||
|
#else
|
||
|
int i=0;
|
||
|
while (xml_trans[i].s) {
|
||
|
if (xml_trans[i].c == c) return std::string(xml_trans[i].s);
|
||
|
i++;
|
||
|
}
|
||
|
{
|
||
|
#endif
|
||
|
char buf[16];
|
||
|
sprintf(buf,"&#%.3d;",static_cast<int>(c));
|
||
|
#ifdef HAVE_MAP
|
||
|
encode_map.insert(std::make_pair(c,buf));
|
||
|
#endif
|
||
|
return std::string(buf);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
unsigned char decode_char(const unsigned char *s) {
|
||
|
char code[32];
|
||
|
int i=0;
|
||
|
while (*s && (*s != ';')) {
|
||
|
code[i]=*s;
|
||
|
s++;
|
||
|
i++;
|
||
|
}
|
||
|
code[i]=';';
|
||
|
code[i+1]=0;
|
||
|
#ifdef HAVE_MAP
|
||
|
if (!(decode_map.size())) populate_decode_map();
|
||
|
std::map<std::string,unsigned char>::iterator p=decode_map.find(code);
|
||
|
if (p!=decode_map.end()) {
|
||
|
return (p->second);
|
||
|
} else {
|
||
|
#else
|
||
|
while (xml_trans[i].s) {
|
||
|
if (!strcmp(xml_trans[i].s,(const char *)(&code[0]))) return xml_trans[i].c;
|
||
|
i++;
|
||
|
}
|
||
|
{
|
||
|
#endif
|
||
|
if (code[1]=='#') {
|
||
|
sscanf((const char *)(code+2),"%d",&i);
|
||
|
#ifdef HAVE_MAP
|
||
|
decode_map.insert(std::make_pair(code,static_cast<unsigned char>(i&0xff)));
|
||
|
#endif
|
||
|
} else {
|
||
|
fprintf(stderr,"Unknown XML entity \"%s\"\n",code);
|
||
|
i='&';
|
||
|
}
|
||
|
return static_cast<unsigned char>(i&0xff);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
std::vector<unsigned char> xml_decode_string(const unsigned char *input, size_t length=0) {
|
||
|
unsigned int i;
|
||
|
char c;
|
||
|
if (!length) {
|
||
|
// We're going to decode until we see a null. Including the null.
|
||
|
length=strlen((const char *)input)+1;
|
||
|
}
|
||
|
std::vector<unsigned char> rv;
|
||
|
unsigned char *p;
|
||
|
rv.reserve(length);
|
||
|
for (i=0; i<length; i++) {
|
||
|
if (input[i]=='&') {
|
||
|
rv.push_back(c=decode_char(input+i));
|
||
|
if ((c!='&') || strncmp((const char *)(input+i),"&",5)) {
|
||
|
p=(unsigned char *)strchr((const char *)(input+i),';');
|
||
|
i=(p-input);
|
||
|
}
|
||
|
} else {
|
||
|
rv.push_back(input[i]);
|
||
|
}
|
||
|
}
|
||
|
return rv;
|
||
|
}
|
||
|
|
||
|
std::string xml_encode_string(const unsigned char *input, size_t length=0) {
|
||
|
unsigned int i;
|
||
|
if (!length) {
|
||
|
// This is bad form. Are you sure there are no nulls in the input?
|
||
|
length=strlen((const char *)input)+1;
|
||
|
}
|
||
|
std::string rv;
|
||
|
rv.reserve(length);
|
||
|
for (i=0; i<length; i++) {
|
||
|
if (isprint(input[i])) {
|
||
|
switch (input[i]) {
|
||
|
case '>':
|
||
|
case '<':
|
||
|
case '&':
|
||
|
case '\'':
|
||
|
case '"':
|
||
|
rv+=encode_char(input[i]);
|
||
|
break;
|
||
|
default:
|
||
|
rv+=input[i];
|
||
|
}
|
||
|
} else {
|
||
|
char buf[16];
|
||
|
sprintf(buf,"&#%.3d",input[i]);
|
||
|
rv+=buf;
|
||
|
}
|
||
|
}
|
||
|
return rv;
|
||
|
}
|
||
|
|
||
|
|