// The contents of this file are subject to the BOINC Public License // Version 1.0 (the "License"); you may not use this file except in // compliance with the License. You may obtain a copy of the License at // http://boinc.berkeley.edu/license_1.0.txt // // Software distributed under the License is distributed on an "AS IS" // basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the // License for the specific language governing rights and limitations // under the License. // // The Original Code is the Berkeley Open Infrastructure for Network Computing. // // The Initial Developer of the Original Code is the SETI@home project. // Portions created by the SETI@home project are Copyright (C) 2002 // University of California at Berkeley. All Rights Reserved. // // Contributor(s): // #include #include #include #include "xml_util.h" // Most of these entries are for reverse translation of poorly written HTML. // Forward translation doesn't translate most printable characters. const xml_entity xml_trans[]= { { 0x07, "&bel;" }, { 0x0a, "&lf;" }, { 0x0d, "&cr;" }, { ' ', "&sp;" }, { '!', "!" }, { '\"', """ }, { '\"', "&dquot;" }, { '#', "#" }, { '$', "$" }, { '%', "%" }, { '&', "&" }, { '\'', "'" }, { '(', "(" }, { ')', ")" }, { '*', "*" }, { '+', "+" }, { ',', "," }, { '-', "‐" }, { '-', "−" }, { '.', "." }, { '/', "/" }, { ':', ":" }, { ';', ";" }, { '<', "<" }, { '=', "=" }, { '>', ">" }, { '?', "?" }, { '@', "@" }, { '[', "[" }, { '\\', "\" }, { ']', "]" }, { '^', "ˆ" }, { '_', "_" }, { '_', "―" }, { '`', "`" }, { '{', "{" }, { '|', "|" }, { '}', "}" }, { '~', "˜" }, { 0x82, "‚" }, { 0x84, "„" }, { 0x85, "&ldots;" }, { 0x8a, "Š" }, { 0x8b, "‹" }, { 0x8c, "Œ" }, { 0x91, "‘" }, { 0x91, "’" }, { 0x92, "’" }, { 0x93, "“" }, { 0x93, "”" }, { 0x94, "”" }, { 0x95, "•" }, { 0x96, "–" }, { 0x96, "&endash;" }, { 0x97, "—" }, { 0x97, "&emdash;" }, { 0xa0, " " }, { 0xa1, "¡" }, { 0xa2, "¢" }, { 0xa3, "£" }, { 0xa4, "¤" }, { 0xa5, "¥" }, { 0xa6, "¦" }, { 0xa7, "§" }, { 0xa8, "¨" }, { 0xa9, "©" }, { 0xaa, "ª" }, { 0xab, "«" }, { 0xac, "¬" }, { 0xad, "­" }, { 0xae, "®" }, { 0xaf, "¯" }, { 0xb0, "°" }, { 0xb1, "±" }, { 0xb2, "²" }, { 0xb3, "³" }, { 0xb4, "´" }, { 0xb5, "µ" }, { 0xb6, "¶" }, { 0xb7, "·" }, { 0xb8, "¸" }, { 0xb9, "¹" }, { 0xba, "º" }, { 0xbb, "»" }, { 0xbc, "¼" }, { 0xbd, "½" }, { 0xbe, "¾" }, { 0xbf, "¿" }, { 0xc0, "À" }, { 0xc1, "Á" }, { 0xc2, "Â" }, { 0xc3, "Ã" }, { 0xc4, "Ä" }, { 0xc5, "Å" }, { 0xc6, "Æ" }, { 0xc7, "Ç" }, { 0xc8, "È" }, { 0xc9, "É" }, { 0xca, "Ê" }, { 0xcb, "Ë" }, { 0xcc, "Ì" }, { 0xcd, "Í" }, { 0xce, "Î" }, { 0xcf, "Ï" }, { 0xd0, "Ð" }, { 0xd1, "Ñ" }, { 0xd2, "Ò" }, { 0xd3, "Ó" }, { 0xd4, "Ô" }, { 0xd5, "Õ" }, { 0xd6, "Ö" }, { 0xd7, "×" }, { 0xd8, "Ø" }, { 0xd9, "Ù" }, { 0xda, "Ú" }, { 0xdb, "Û" }, { 0xdc, "Ü" }, { 0xdd, "Ý" }, { 0xde, "Þ" }, { 0xdf, "ß" }, { 0xe0, "à" }, { 0xe1, "á" }, { 0xe2, "â" }, { 0xe3, "ã" }, { 0xe4, "ä" }, { 0xe5, "å" }, { 0xe6, "æ" }, { 0xe7, "ç" }, { 0xe8, "è" }, { 0xe9, "é" }, { 0xea, "ê" }, { 0xeb, "ë" }, { 0xec, "ì" }, { 0xed, "í" }, { 0xee, "î" }, { 0xef, "ï" }, { 0xf0, "ð" }, { 0xf1, "ñ" }, { 0xf2, "ò" }, { 0xf3, "ó" }, { 0xf4, "ô" }, { 0xf5, "õ" }, { 0xf6, "ö" }, { 0xf7, "÷" }, { 0xf8, "ø" }, { 0xf9, "ù" }, { 0xfa, "ú" }, { 0xfb, "û" }, { 0xfc, "ü" }, { 0xfd, "ý" }, { 0xfe, "þ" }, { 0xff, "ÿ" }, { 0x00, 0 } }; #ifdef HAVE_MAP #include std::multimap encode_map; std::map decode_map; void populate_encode_map() { int i=0; do { encode_map.insert(std::make_pair(xml_trans[i].c,xml_trans[i].s)); } while (xml_trans[++i].s); } void populate_decode_map() { int i=0; do { decode_map[xml_trans[i].s]=xml_trans[i].c; } while (xml_trans[++i].s); } #endif std::string encode_char(unsigned char c) { #ifdef HAVE_MAP if (!(encode_map.size())) populate_encode_map(); std::multimap::iterator p=encode_map.find(c); if (p!=encode_map.end()) { return (p->second); } else { #else int i=0; while (xml_trans[i].s) { if (xml_trans[i].c == c) return std::string(xml_trans[i].s); i++; } { #endif char buf[16]; sprintf(buf,"&#%.3d;",static_cast(c)); #ifdef HAVE_MAP encode_map.insert(std::make_pair(c,buf)); #endif return std::string(buf); } } unsigned char decode_char(const unsigned char *s) { char code[32]; int i=0; while (*s && (*s != ';')) { code[i]=*s; s++; i++; } code[i]=';'; code[i+1]=0; #ifdef HAVE_MAP if (!(decode_map.size())) populate_decode_map(); std::map::iterator p=decode_map.find(code); if (p!=decode_map.end()) { return (p->second); } else { #else while (xml_trans[i].s) { if (!strcmp(xml_trans[i].s,(const char *)(&code[0]))) return xml_trans[i].c; i++; } { #endif if (code[1]=='#') { sscanf((const char *)(code+2),"%d",&i); #ifdef HAVE_MAP decode_map.insert(std::make_pair(code,static_cast(i&0xff))); #endif } else { fprintf(stderr,"Unknown XML entity \"%s\"\n",code); i='&'; } return static_cast(i&0xff); } } std::vector xml_decode_string(const unsigned char *input, size_t length=0) { unsigned int i; char c; if (!length) { // We're going to decode until we see a null. Including the null. length=strlen((const char *)input)+1; } std::vector rv; unsigned char *p; rv.reserve(length); for (i=0; i': case '<': case '&': case '\'': case '"': rv+=encode_char(input[i]); break; default: rv+=input[i]; } } else { char buf[16]; sprintf(buf,"&#%.3d",input[i]); rv+=buf; } } return rv; }