// $Id$ // The contents of this file are subject to the BOINC Public License // Version 1.0 (the "License"); you may not use this file except in // compliance with the License. You may obtain a copy of the License at // http://boinc.berkeley.edu/license_1.0.txt // // Software distributed under the License is distributed on an "AS IS" // basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the // License for the specific language governing rights and limitations // under the License. // // The Original Code is the Berkeley Open Infrastructure for Network Computing. // // The Initial Developer of the Original Code is the SETI@home project. // Portions created by the SETI@home project are Copyright (C) 2002 // University of California at Berkeley. All Rights Reserved. // // Contributor(s): // // Additional routines to help maintain XML compliance. // // Revision History: // $Log$ // Revision 1.21 2004/04/05 22:07:08 korpela // // Segfault problem fixed? // // Revision 1.20 2004/03/06 09:45:25 rwalton // *** empty log message *** // // Revision 1.19 2004/02/05 05:32:22 quarl // *** empty log message *** // // Revision 1.18 2004/01/22 17:57:41 davea // *** empty log message *** // // Revision 1.17 2003/12/01 23:42:05 korpela // Under some compilers template parameters of type char [] weren't getting // cast to char *. Template functions now use &(array[0]) to ensure correct // type is used. // // #ifndef _XML_UTIL_H_ #define _XML_UTIL_H_ #include "config.h" #include #include #include #include #include #include "error_numbers.h" using namespace std; typedef enum tag_xml_encoding { _x_xml_entity=0, _x_xml_cdata, _x_xml_values, _quoted_printable, _base64, _x_base85, _x_setiathome, _x_hex, _x_csv, _x_uuencode, _8bit, _binary } xml_encoding; const char * const xml_encoding_names[]={ "x-xml-entity", "x-xml-cdata", "x-xml-values", "quoted-printable", "base64", "x-base85", "x-setiathome", "x-hex", "x-csv", "x-uuencode", "8bit", "binary" }; #if 0 // the xml_ostream class is an ostream, which can be constructed // from an existing ostream (i.e. cout). When constructed, // an xml header and the opening tag are written. When destructed, // the closing tag is written. class xml_ostream { public: xml_ostream(ostream &o, const char *tag); ~xml_ostream(); template xml_ostream &operator <<(const T &t) { os << t; return *this; }; private: void write_head(); void write_foot(); string my_tag; ostream &os; }; // the xml_ofstream class is an ofstream. When the file is opened, // an xml header and the opening tag are written. Upon close, // the closing tag is written. class xml_ofstream { public: xml_ofstream(); explicit xml_ofstream(const char *filename, const char *tag, ios_base::openmode m=ios_base::out|ios_base::binary); ~xml_ofstream(); void open(const char *p, const char *tag, ios_base::openmode m=ios_base::out|ios_base::binary); void close(); private: void write_head(); void write_foot(); string my_tag; ofstream &os; }; // the xml_istream class is an istream that can be constructed from // an existing istream. When constructed, the stream is read until // the opening tag or end of file is found. This is really only useful // for reading XML from stdin. class xml_istream { public: explicit xml_istream(istream &i, const char *tag=0); ~xml_istream(); operator istream &() {return is;}; private: void seek_head(); string my_tag; istream &is; }; // the xml_ifstream class is an ifstream. When the file is opened, // the file pointer is set after the opening tag. An attempt to // read past the closing tag will fail as if the end of the file has // been reached. If no tag is given, it will assume the first tag // found is the main tag. #ifndef HAVE_STD_POS_TYPE typedef off_t pos_type; #endif #ifndef HAVE_STD_OFF_TYPE typedef off_t off_type; #endif class xml_ifstream { public: xml_ifstream(); explicit xml_ifstream(const char *filename, const char *tag=0, ios_base::openmode m=ios_base::in|ios_base::binary); ~xml_ifstream(); void open(const char *filename, const char *tag=0, ios_base::openmode m=ios_base::in|ios_base::binary); xml_ifstream &seekg(pos_type p); xml_ifstream &seekg(off_type o, ios_base::seekdir d); pos_type tellg(); bool eof(); private: void seek_head(); string my_tag; pos_type xml_start; pos_type xml_end; ifstream &ifs; }; #endif // 0 #define XML_ENCODING "iso-8859-1" static const char * const xml_header= "\n"; // XML entity for tranlation table (not wchar_t compatible) struct xml_entity { unsigned char c; const char *s; }; // change the xml indent level (number of spaces) by adding or subtracting // "i" spaces. return a string of spaces corresponding to the current xml // indent level. string xml_indent(int i=0); static const int XML_MAX_INDENT=40; extern int xml_indent_level; // decode an XML character string. Return a the decoded string in a vector // (null not necessarily a terminator). //template //vector xml_decode_string(const char *input, size_t length=0, // const char *encoding="x_xml_entity"); // do the same thing, but get the length and encoding type from the // xml tag properties. template vector xml_decode_field(const string &input, const char *tag); // encode an XML character string. Return the encoded string. //template //string xml_encode_string(const T *input, size_t n_elements=0, // xml_encoding encoding=_x_xml_entity); template inline string xml_encode_string(const vector &input, xml_encoding encoding=_x_xml_entity) { return xml_encode_string(&(*(input.begin())),input.size(),encoding); } #include #include #include #include extern const char *encode_arr; extern const char *encode_arr85; bool isencchar(char c); bool isencchar85(char c); template string base64_encode(const T *tbin, size_t n_elements) { size_t nbytes=n_elements*sizeof(T); const unsigned char *bin=(const unsigned char *)(tbin); int count=0, offset=0, nleft; const char crlf[]={0xa,0xd,0x0}; string rv(""); rv.reserve(nbytes*4/3+nbytes*2/57); char c[5]; for (nleft = (int)nbytes; nleft > 0; nleft -= 3) { int i; c[0] = (bin[offset]>>2) & 0x3f ; // 6 c[1] = (bin[offset]<<4) & 0x3f | ((bin[offset+1]>>4)&0xf); // 2+4 c[2] = ((bin[offset+1]<<2)&0x3f) | ((bin[offset+2]>>6)&0x3);// 4+2 c[3] = bin[offset+2]&0x3f; // 6 for (i=0;i<((nleft>3)?4:(nleft+1));i++) c[i]=encode_arr[c[i]]; for (;i<4;i++) c[i]='='; rv+=c; offset += 3; count += 4; if (count == 76 ) { count = 0; rv+=crlf; } } rv+=crlf; return rv; } template vector base64_decode(const char *data, size_t nbytes) { const char *p=data,*eol,*eol2; const char cr=0xa,lf=0xd; char in[4],c[3]; int i; vector rv; rv.reserve(nbytes*3/4); while (p<(data+nbytes)) { while (!isencchar(*p)) { *p++; } eol=strchr(p,cr); eol2=strchr(p,lf); if (eol && eol2) { eol=min(eol,eol2); } for (;p<(eol-1);p+=4) { for ( i=0;i<4;i++) { if ((p[i]>='A') && (p[i]<='Z')) { in[i]=p[i]-'A'; } else if ((p[i]>='a') && (p[i]<='z')) { in[i]=p[i]-'a'+26; } else if ((p[i]>='0') && (p[i]<='9')) { in[i]=p[i]-'0'+52; } else { switch (p[i]) { case '+': in[i]=62; break; case '/': in[i]=63; break; default : in[i]=0; } } } c[0]=(in[0]<<2) | ((in[1] >> 4) & 0x3); c[1]=(in[1]<<4) | ((in[2] >> 2) & 0xf); c[2]=(in[2]<<6) | in[3]; for ( i=0;i<3;i++) rv.push_back(c[i]); } } return vector((T *)(&(rv[0])),(T *)(&(rv[0]))+rv.size()/sizeof(T)); } template string base85_encode(const T *tbin, size_t n_elements) { size_t nbytes=n_elements*sizeof(T); const unsigned char *bin=(const unsigned char *)(tbin); int count=0; const char crlf[]={0xa,0xd,0x0}; string rv(""); rv.reserve(nbytes*4/3+nbytes*2/57); char c[6]; int n_pads; unsigned int j=0; while (j74) { rv+=crlf; count=0; } count+=(int)strlen(c); rv+=c; } return rv; } template vector base85_decode(const char *data, size_t nbytes) { const char *p=data,*eol,*eol2; const char cr=0xa,lf=0xd; unsigned long val; int npads; vector rv; rv.reserve(nbytes*4/5); while (p<(data+nbytes)) { while (!isencchar85(*p)) { *p++; } eol=strchr(p,cr); eol2=strchr(p,lf); if (eol && eol2) { eol=min(eol,eol2); } while (p0) { if (p[i]!='_') break; npads++; } for (i=0;i='0') && (p[i]<='9')) { val=p[i]-'0'; } else if ((p[i]>='A') && (p[i]<='Z')) { val=p[i]-'A'+10; } else if ((p[i]>='a') && (p[i]<='y')) { val=p[i]-'a'+36; } else { for (int j=62; j<85; j++) { if (p[i]==encode_arr85[j]) { val=j; j=85; } } } } } rv.push_back(val); } } return vector((T *)(&(rv[0])),(T *)(&(rv[0]))+rv.size()/sizeof(T)); } template string x_setiathome_encode(const T *tbin, size_t n_elements) { size_t nbytes=n_elements*sizeof(T); const unsigned char *bin=(const unsigned char *)(tbin); int count=0, offset=0, nleft; const char cr=0xa; string rv(""); rv.reserve(nbytes*4/3+nbytes*2/48); rv+="\n"; char c[5]; for (nleft = (int)nbytes; nleft > 0; nleft -= 3) { c[0] = bin[offset]&0x3f; // 6 c[1] = (bin[offset]>>6) | (bin[offset+1]<<2)&0x3f; // 2+4 c[2] = ((bin[offset+1]>>4)&0xf) | (bin[offset+2]<<4)&0x3f;// 4+2 c[3] = bin[offset+2]>>2; // 6 for (int i=0;i<4;i++) c[i]+=0x20; c[4]=0; rv+=c; offset += 3; count += 4; if (count == 64) { count = 0; rv+=cr; } } rv+=cr; return rv; } template vector x_setiathome_decode(const char *data, size_t nbytes) { const char *p=data,*eol,*eol2; char in[4],c[3]; int i; vector rv; rv.reserve(nbytes*3/4); while (p<(data+nbytes)) { while ((*p<0x20) || (*p>0x60)){ *p++; } eol=strchr(p,'\n'); eol2=strchr(p,'\r'); if (eol && eol2) { eol=min(eol,eol2); } for (;p<(eol-1);p+=4) { memcpy(in,p,4); for ( i=0;i<4;i++) in[i]-=0x20; c[0]=in[0]&0x3f | in[1]<<6; c[1]=in[1]>>2 | in[2]<<4; c[2]=in[2]>>4 | in[3]<<2; for ( i=0;i<3;i++) rv.push_back(c[i]); } } return vector((T *)(&(rv[0])),(T *)(&(rv[0]))+rv.size()/sizeof(T)); } template string quoted_printable_encode(const T *tbin, size_t n_elements) { size_t nbytes=n_elements*sizeof(T); const unsigned char *bin=(const unsigned char *)(tbin); int line_len=0; const char crlf[]={'=',0xa,0xd,0x0}; string rv(""); rv.reserve(nbytes*4/3+nbytes*2/48); for (size_t i=0;i 74) { rv+=crlf; line_len=1; } rv+=bin[i]; } else { line_len+=3; if (line_len>72) { rv+=crlf; line_len=3; } char buf[4]; sprintf(buf,"=%.2X",bin[i]); rv+=buf; } } return rv; } template vector quoted_printable_decode(const char* data, size_t nbytes) { vector rv; rv.reserve(strlen(data)); size_t i=0; while (i((T *)(&(rv[0])),(T *)(&(rv[0]))+rv.size()/sizeof(T)); } template string x_hex_encode(const T *tbin, size_t n_elements) { size_t nbytes=n_elements*sizeof(T); const unsigned char *bin=(const unsigned char *)(tbin); string rv; int count=0; rv.reserve(nbytes*2+nbytes*2/76); for (unsigned int i=0; i vector x_hex_decode(const char *data, size_t nbytes) { vector rv; rv.reserve(nbytes/2); unsigned int i=0; while (i(c)); } return vector((T *)(&(rv[0])),(T *)(&(rv[0]))+rv.size()/sizeof(T)); } string x_csv_encode_char(const unsigned char *bin, size_t nelements); template string x_csv_encode(const T *bin, size_t nelements) { ostringstream rv(""); long lastlen=0,i; bool ischar=(sizeof(T)==1); rv << endl << xml_indent(2); if (ischar) return x_csv_encode_char((const unsigned char *)bin, nelements); for (i=0;i<(static_cast(nelements)-1);i++) { rv << bin[i] << ','; if ((static_cast(rv.str().size())-lastlen-min(xml_indent_level,XML_MAX_INDENT))>73) { rv << endl << xml_indent(); lastlen=(long)rv.str().size(); } } rv << bin[i] << "\n" << xml_indent(-2); return rv.str(); } template vector x_csv_decode(const char *data, size_t nbytes) { vector rv; while (!isdigit(*data)) { data++; nbytes--; } istringstream in(string(data,nbytes)); bool ischar=(sizeof(T)==1); while (in) { T t; if (!ischar) { in >> t; } else { int i; in >> i; t=i & 0xff; } if (in) rv.push_back(t); char c=' '; while (in && !isdigit(c)) { in.get(c); } if (in) in.putback(c); } return rv; } string encode_char(unsigned char c); unsigned char decode_char(const char *s); template vector x_xml_entity_decode(const char *input, size_t length) { unsigned int i; char c; if (!length) { // We're going to decode until we see a null. Including the null. length=strlen((const char *)input); } vector rv; char *p; rv.reserve(length); for (i=0; i((T *)(&(rv[0])),(T *)(&(rv[0]))+rv.size()/sizeof(T)); } template string x_xml_entity_encode(const T *tbin, size_t n_elements) { size_t length=n_elements*sizeof(T); const unsigned char *input=(const unsigned char *)(tbin); unsigned int i; string rv; rv.reserve(length); for (i=0; i': case '<': case '&': case '\'': case '"': rv+=encode_char(input[i]); break; default: rv+=input[i]; } } else { char buf[16]; sprintf(buf,"&#%.3d;",input[i]); rv+=buf; } } return rv; } template string x_xml_values_encode(const T *bin, size_t n_elements) { ostringstream rv(""); unsigned int i; for (i=0;i vector x_xml_values_decode(const char *data, size_t length) { istringstream r(string(data,length)); vector rv; T t; while (!r.eof()) { r >> t ; rv.push_back(t); while ((isspace(r.peek()) || (r.peek() == ',')) && !r.eof()) { char c; r.get(c); } } return rv; } template string x_xml_cdata_encode(const T *tbin, size_t n_elements) { size_t length=n_elements*sizeof(T); const unsigned char *input=(const unsigned char *)(tbin); unsigned int i; string rv("0x1f) { switch (input[i]) { case ']': if (((length-i)>1) && (input[i+1]==']') && (input[i+2]=='>')) { rv+="&&endcdt;"; } else { rv+=']'; } break; default: rv+=input[i]; } } else { char buf[16]; sprintf(buf,"&&#%.2d;",input[i]); rv+=buf; } } rv+="]]>"; return rv; } template vector x_xml_cdata_decode(const char *input, size_t length) { unsigned int i; char c; if (!length) { // We're going to decode until we see a null. Including the null. length=strlen(input); } vector rv; char *p; rv.reserve(length); for (i=0; i8) && !strncmp((const char *)(input+i),"&&endcdt;",9)) { rv.push_back(']'); rv.push_back(']'); rv.push_back('>'); i+=8; } else { if (input[i+1]=='&') { rv.push_back(c=decode_char(input+i+1)); if ((c!='&') || !strncmp((const char *)(input+i+1),"&",5)) { p=strchr(input+i+1,';'); i=(p-input); } } else { rv.push_back(input[i]); } } } else { rv.push_back(input[i]); } } return vector((T *)(&(rv[0])),(T *)(&(rv[0]))+rv.size()/sizeof(T)); } template vector x_uudecode(const char *data, size_t nbytes) { vector rv; return rv; } template string x_uuencode(const T *data, size_t nbytes) { string rv; return rv; } template vector xml_decode_string(const char *input, size_t length=0, const char *encoding="x_xml_entity") { int i=_x_xml_entity; do { if (!strncmp(encoding,xml_encoding_names[i],strlen(xml_encoding_names[i]))) break; } while (i++ != _binary); switch (i) { case _x_xml_entity: return x_xml_entity_decode(input,length); case _x_xml_cdata: return x_xml_cdata_decode(input,length); case _x_xml_values: return x_xml_values_decode(input,length); case _quoted_printable: return quoted_printable_decode(input,length); case _base64: return base64_decode(input,length); case _x_base85: return base85_decode(input,length); case _x_setiathome: return x_setiathome_decode(input,length); case _x_hex: return x_hex_decode(input,length); case _x_csv: return x_csv_decode(input,length); case _x_uuencode: return x_uudecode(input,length); case _8bit: case _binary: return vector((const T *)input,(const T *)input+length/sizeof(T)); default: return x_xml_entity_decode(input,length); } } template vector xml_decode_field(const string &input, const char *tag) { string start_tag("<"),end_tag("',start)+1; if (!length) { length=(unsigned int)endt - (unsigned int)start; } return (xml_decode_string(&(input[start]),length,encoding)); } template string xml_encode_string(const T *input, size_t length=0, xml_encoding encoding=_x_xml_entity) { switch (encoding) { case _x_xml_entity: return x_xml_entity_encode(input,length); case _x_xml_cdata: return x_xml_cdata_encode(input,length); case _x_xml_values: return x_xml_values_encode(input,length); case _quoted_printable: return quoted_printable_encode(input,length); case _base64: return base64_encode(input,length); case _x_base85: return base85_encode(input,length); case _x_setiathome: return x_setiathome_encode(input,length); case _x_hex: return x_hex_encode(input,length); case _x_csv: return x_csv_encode(input,length); case _x_uuencode: return x_uuencode(input,length); case _8bit: case _binary: return string((const char *)(input),length*sizeof(T)); default: return x_xml_entity_encode(input,length); } } extern bool xml_match_tag(const char*, const char*); extern bool xml_match_tag(const string &, const char*); extern bool extract_xml_record(const string &field, const char *tag, string &record); #endif // // $Log$ // Revision 1.21 2004/04/05 22:07:08 korpela // // Segfault problem fixed? // // Revision 1.20 2004/03/06 09:45:25 rwalton // *** empty log message *** // // Revision 1.19 2004/02/05 05:32:22 quarl // *** empty log message *** // // Revision 1.18 2004/01/22 17:57:41 davea // *** empty log message *** // // Revision 1.17 2003/12/01 23:42:05 korpela // Under some compilers template parameters of type char [] weren't getting // cast to char *. Template functions now use &(array[0]) to ensure correct // type is used. // // Revision 1.16 2003/10/29 20:08:50 korpela // *** empty log message *** // // Revision 1.15 2003/10/27 23:07:34 korpela // *** empty log message *** // // Revision 1.14 2003/10/27 20:07:12 korpela // *** empty log message *** // // Revision 1.13 2003/10/25 18:20:03 korpela // *** empty log message *** // // Revision 1.12 2003/10/24 16:58:11 korpela // *** empty log message *** // // Revision 1.11 2003/10/23 15:39:54 korpela // no message // // Revision 1.10 2003/10/22 23:11:49 davea // *** empty log message *** // // Revision 1.9 2003/10/22 22:36:52 jeffc // jeffc - init xml_encode/decode_string in the definition, not the prototype // // Revision 1.8 2003/10/22 18:13:39 korpela // *** empty log message *** // // Revision 1.7 2003/10/22 17:43:10 korpela // *** empty log message *** // // Revision 1.6 2003/10/22 15:24:10 korpela // *** empty log message *** // // Revision 1.5 2003/10/22 03:09:55 korpela // *** empty log message *** // // Revision 1.4 2003/10/21 18:14:36 korpela // *** empty log message *** // //