2005-01-20 23:22:22 +00:00
|
|
|
// Berkeley Open Infrastructure for Network Computing
|
|
|
|
// http://boinc.berkeley.edu
|
|
|
|
// Copyright (C) 2005 University of California
|
2004-11-14 08:29:32 +00:00
|
|
|
//
|
2005-01-20 23:22:22 +00:00
|
|
|
// This is free software; you can redistribute it and/or
|
|
|
|
// modify it under the terms of the GNU Lesser General Public
|
|
|
|
// License as published by the Free Software Foundation;
|
|
|
|
// either version 2.1 of the License, or (at your option) any later version.
|
2003-10-03 06:46:22 +00:00
|
|
|
//
|
2005-01-20 23:22:22 +00:00
|
|
|
// This software is distributed in the hope that it will be useful,
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
// See the GNU Lesser General Public License for more details.
|
2002-04-30 22:22:54 +00:00
|
|
|
//
|
2005-01-20 23:22:22 +00:00
|
|
|
// To view the GNU Lesser General Public License visit
|
|
|
|
// http://www.gnu.org/copyleft/lesser.html
|
|
|
|
// or write to the Free Software Foundation, Inc.,
|
|
|
|
// 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
2002-04-30 22:22:54 +00:00
|
|
|
|
2002-05-17 22:33:57 +00:00
|
|
|
// A very crude interface for parsing XML files;
|
|
|
|
// assumes all elements are either single-line or
|
|
|
|
// have start and end tags on separate lines.
|
|
|
|
// This is meant to be used ONLY for parsing XML files produced
|
|
|
|
// by the BOINC scheduling server or client.
|
|
|
|
// Could replace this with a more general parser.
|
|
|
|
|
2005-07-14 16:46:38 +00:00
|
|
|
#if defined(_WIN32) && !defined(__STDWX_H__) && !defined(_BOINC_WIN_) && !defined(_AFX_STDAFX_H_)
|
|
|
|
#include "boinc_win.h"
|
|
|
|
#endif
|
|
|
|
|
2004-03-04 11:41:43 +00:00
|
|
|
#ifndef _WIN32
|
2005-11-21 18:34:44 +00:00
|
|
|
#include "config.h"
|
2004-07-13 13:54:09 +00:00
|
|
|
#include <cstring>
|
|
|
|
#include <cstdlib>
|
2003-06-16 19:06:08 +00:00
|
|
|
#include <string>
|
2005-09-17 21:28:25 +00:00
|
|
|
#include <math.h>
|
2005-09-20 04:36:26 +00:00
|
|
|
#if HAVE_IEEEFP_H
|
|
|
|
#include <ieeefp.h>
|
|
|
|
#endif
|
2004-03-04 11:41:43 +00:00
|
|
|
#endif
|
2004-03-03 19:00:42 +00:00
|
|
|
|
2002-07-11 01:09:53 +00:00
|
|
|
#include "error_numbers.h"
|
2007-02-21 16:26:51 +00:00
|
|
|
#include "str_util.h"
|
2003-03-06 00:42:18 +00:00
|
|
|
#include "parse.h"
|
2004-01-22 02:01:09 +00:00
|
|
|
|
2004-04-30 23:18:56 +00:00
|
|
|
#ifdef _USING_FCGI_
|
|
|
|
#include "fcgi_stdio.h"
|
|
|
|
#endif
|
2002-04-30 22:22:54 +00:00
|
|
|
|
2004-06-30 22:16:26 +00:00
|
|
|
using std::string;
|
|
|
|
|
2002-04-30 22:22:54 +00:00
|
|
|
|
2005-08-12 18:31:47 +00:00
|
|
|
|
|
|
|
// Parse a boolean; tag is of form "foobar"
|
2007-01-23 19:14:56 +00:00
|
|
|
// Accept either <foobar/> or <foobar>0|1</foobar>
|
2005-08-12 18:31:47 +00:00
|
|
|
//
|
|
|
|
bool parse_bool(const char* buf, const char* tag, bool& result) {
|
|
|
|
char single_tag[256], start_tag[256];
|
|
|
|
int x;
|
|
|
|
|
|
|
|
sprintf(single_tag, "<%s/>", tag);
|
|
|
|
if (match_tag(buf, single_tag)) {
|
|
|
|
result = true;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
sprintf(start_tag, "<%s>", tag);
|
|
|
|
if (parse_int(buf, start_tag, x)) {
|
|
|
|
result = (x != 0);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2003-11-02 23:08:06 +00:00
|
|
|
// parse a string of the form ...<tag attrs>string</tag>...;
|
2003-10-18 19:35:58 +00:00
|
|
|
// returns the "string" part.
|
2004-02-02 21:09:05 +00:00
|
|
|
// Does XML unescaping (replace < with <)
|
2004-01-21 07:07:16 +00:00
|
|
|
// "string" may not include '<'
|
2003-11-02 23:08:06 +00:00
|
|
|
// Strips white space from ends.
|
|
|
|
// Use "<tag", not "<tag>", if there might be attributes
|
2002-09-22 23:27:14 +00:00
|
|
|
//
|
2006-04-17 22:41:29 +00:00
|
|
|
bool parse_str(const char* buf, const char* tag, char* dest, int destlen) {
|
2004-02-02 21:09:05 +00:00
|
|
|
string str;
|
2005-07-06 09:14:43 +00:00
|
|
|
const char* p;
|
2006-04-17 22:41:29 +00:00
|
|
|
char tempbuf[1024];
|
|
|
|
int len;
|
2004-11-28 11:31:54 +00:00
|
|
|
|
|
|
|
// sanity check on NULL and empty cases.
|
|
|
|
if (!buf || !tag || !strlen(tag))
|
2005-01-13 01:00:46 +00:00
|
|
|
return false;
|
2004-11-28 11:31:54 +00:00
|
|
|
|
|
|
|
p = strstr(buf, tag);
|
2003-10-03 06:46:22 +00:00
|
|
|
if (!p) return false;
|
|
|
|
p = strchr(p, '>');
|
2006-04-18 17:55:14 +00:00
|
|
|
p++;
|
2005-07-06 09:14:43 +00:00
|
|
|
const char* q = strchr(p, '<');
|
2003-10-03 06:46:22 +00:00
|
|
|
if (!q) return false;
|
2006-04-17 22:41:29 +00:00
|
|
|
len = (int)(q-p);
|
|
|
|
if (len >= destlen) len = destlen-1;
|
2006-04-18 17:55:14 +00:00
|
|
|
memcpy(tempbuf, p, len);
|
2006-04-17 22:41:29 +00:00
|
|
|
tempbuf[len] = 0;
|
|
|
|
strip_whitespace(tempbuf);
|
|
|
|
xml_unescape(tempbuf, dest);
|
2004-02-02 21:09:05 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2006-04-17 22:41:29 +00:00
|
|
|
bool parse_str(const char* buf, const char* tag, string& dest) {
|
|
|
|
char tempbuf[1024];
|
|
|
|
if (!parse_str(buf, tag, tempbuf, 1024)) return false;
|
|
|
|
dest = tempbuf;
|
2002-04-30 22:22:54 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2006-08-23 03:34:30 +00:00
|
|
|
// parse a string of the form 'xxx name="value" xxx';
|
|
|
|
// returns value in dest
|
2002-09-22 23:27:14 +00:00
|
|
|
//
|
2003-06-17 01:03:45 +00:00
|
|
|
void parse_attr(const char* buf, const char* name, char* dest, int len) {
|
2005-08-29 03:33:28 +00:00
|
|
|
const char* p;
|
|
|
|
const char *q;
|
2002-07-31 05:59:43 +00:00
|
|
|
|
2002-09-22 23:27:14 +00:00
|
|
|
strcpy(dest, "");
|
2005-07-20 10:26:47 +00:00
|
|
|
p = strstr(buf, name);
|
2002-05-17 22:33:57 +00:00
|
|
|
if (!p) return;
|
|
|
|
p = strchr(p, '"');
|
|
|
|
if (!p) return;
|
|
|
|
q = strchr(p+1, '"');
|
|
|
|
if (!q) return;
|
2004-12-06 00:08:04 +00:00
|
|
|
if (len > q-p) len = (int)(q-p);
|
2006-01-05 06:42:45 +00:00
|
|
|
strlcpy(dest, p+1, len);
|
2002-05-17 22:33:57 +00:00
|
|
|
}
|
|
|
|
|
2002-04-30 22:22:54 +00:00
|
|
|
void copy_stream(FILE* in, FILE* out) {
|
|
|
|
char buf[1024];
|
|
|
|
int n, m;
|
|
|
|
while (1) {
|
2004-03-06 09:45:25 +00:00
|
|
|
n = (int)fread(buf, 1, 1024, in);
|
|
|
|
m = (int)fwrite(buf, 1, n, out);
|
2002-04-30 22:22:54 +00:00
|
|
|
if (n < 1024) break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2002-09-22 23:27:14 +00:00
|
|
|
// append to a malloc'd string
|
|
|
|
//
|
2006-03-02 22:51:41 +00:00
|
|
|
int strcatdup(char*& p, char* buf) {
|
2002-07-05 05:33:40 +00:00
|
|
|
p = (char*)realloc(p, strlen(p) + strlen(buf)+1);
|
|
|
|
if (!p) {
|
2006-03-02 22:51:41 +00:00
|
|
|
return ERR_MALLOC;
|
2002-07-05 05:33:40 +00:00
|
|
|
}
|
|
|
|
strcat(p, buf);
|
2006-03-02 22:51:41 +00:00
|
|
|
return 0;
|
2002-07-05 05:33:40 +00:00
|
|
|
}
|
|
|
|
|
2002-09-22 23:27:14 +00:00
|
|
|
// copy from a file to a malloc'd string until the end tag is reached
|
|
|
|
//
|
2003-06-17 01:03:45 +00:00
|
|
|
int dup_element_contents(FILE* in, const char* end_tag, char** pp) {
|
2002-06-21 06:52:47 +00:00
|
|
|
char buf[256];
|
2006-03-02 22:51:41 +00:00
|
|
|
int retval;
|
2002-07-31 05:59:43 +00:00
|
|
|
|
2002-07-05 05:33:40 +00:00
|
|
|
char* p = strdup("");
|
2002-06-21 06:52:47 +00:00
|
|
|
while (fgets(buf, 256, in)) {
|
|
|
|
if (strstr(buf, end_tag)) {
|
|
|
|
*pp = p;
|
|
|
|
return 0;
|
|
|
|
}
|
2006-03-02 22:51:41 +00:00
|
|
|
retval = strcatdup(p, buf);
|
|
|
|
if (retval) return retval;
|
2002-06-21 06:52:47 +00:00
|
|
|
}
|
2003-10-21 04:06:55 +00:00
|
|
|
return ERR_XML_PARSE;
|
2002-06-21 06:52:47 +00:00
|
|
|
}
|
2002-07-07 20:39:24 +00:00
|
|
|
|
2003-02-11 00:52:44 +00:00
|
|
|
// copy from a file to static buffer
|
|
|
|
//
|
2003-06-17 01:03:45 +00:00
|
|
|
int copy_element_contents(FILE* in, const char* end_tag, char* p, int len) {
|
2003-02-11 00:52:44 +00:00
|
|
|
char buf[256];
|
2004-09-22 21:08:26 +00:00
|
|
|
int n;
|
2003-02-11 00:52:44 +00:00
|
|
|
|
|
|
|
strcpy(p, "");
|
|
|
|
while (fgets(buf, 256, in)) {
|
|
|
|
if (strstr(buf, end_tag)) {
|
|
|
|
return 0;
|
|
|
|
}
|
2004-10-25 20:16:30 +00:00
|
|
|
n = (int)strlen(buf);
|
2004-09-22 21:08:26 +00:00
|
|
|
if (n >= len-1) return ERR_XML_PARSE;
|
2003-02-11 00:52:44 +00:00
|
|
|
strcat(p, buf);
|
2004-09-22 21:08:26 +00:00
|
|
|
len -= n;
|
2003-02-11 00:52:44 +00:00
|
|
|
}
|
2003-10-21 04:06:55 +00:00
|
|
|
return ERR_XML_PARSE;
|
2003-02-11 00:52:44 +00:00
|
|
|
}
|
|
|
|
|
2003-11-28 19:19:11 +00:00
|
|
|
int copy_element_contents(FILE* in, const char* end_tag, string& str) {
|
|
|
|
char buf[256];
|
|
|
|
|
|
|
|
str = "";
|
|
|
|
while (fgets(buf, 256, in)) {
|
|
|
|
if (strstr(buf, end_tag)) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
str += buf;
|
|
|
|
}
|
|
|
|
return ERR_XML_PARSE;
|
|
|
|
}
|
|
|
|
|
2005-08-05 22:00:19 +00:00
|
|
|
void file_to_str(FILE* in, string& str) {
|
|
|
|
char buf[256];
|
|
|
|
|
|
|
|
str = "";
|
|
|
|
while (fgets(buf, 256, in)) {
|
|
|
|
str += buf;
|
|
|
|
}
|
|
|
|
}
|
2003-11-28 19:19:11 +00:00
|
|
|
|
2002-09-22 23:27:14 +00:00
|
|
|
// read a file into a malloc'd string
|
|
|
|
//
|
2003-06-17 01:03:45 +00:00
|
|
|
int read_file_malloc(const char* pathname, char*& str) {
|
2002-07-07 20:39:24 +00:00
|
|
|
char buf[256];
|
|
|
|
FILE* f;
|
2006-03-02 22:51:41 +00:00
|
|
|
int retval;
|
2002-07-07 20:39:24 +00:00
|
|
|
|
|
|
|
f = fopen(pathname, "r");
|
2003-10-21 04:06:55 +00:00
|
|
|
if (!f) return ERR_FOPEN;
|
2002-07-07 20:39:24 +00:00
|
|
|
str = strdup("");
|
|
|
|
while (fgets(buf, 256, f)) {
|
2006-03-02 22:51:41 +00:00
|
|
|
retval = strcatdup(str, buf);
|
|
|
|
if (retval) return retval;
|
2002-07-07 20:39:24 +00:00
|
|
|
}
|
|
|
|
fclose(f);
|
|
|
|
return 0;
|
|
|
|
}
|
2002-10-14 23:10:12 +00:00
|
|
|
|
2002-12-20 02:12:27 +00:00
|
|
|
|
2004-09-13 18:05:54 +00:00
|
|
|
// replace XML element contents (element must be present)
|
2002-10-14 23:10:12 +00:00
|
|
|
//
|
2004-09-13 18:05:54 +00:00
|
|
|
void replace_element_contents(
|
2005-02-16 23:17:43 +00:00
|
|
|
char* buf, const char* start, const char* end, const char* replacement
|
2004-09-13 18:05:54 +00:00
|
|
|
) {
|
2002-12-20 20:20:56 +00:00
|
|
|
char temp[4096], *p, *q;
|
2002-10-14 23:10:12 +00:00
|
|
|
|
|
|
|
p = strstr(buf, start);
|
|
|
|
p += strlen(start);
|
|
|
|
q = strstr(p, end);
|
2006-01-05 06:42:45 +00:00
|
|
|
strlcpy(temp, q, sizeof(temp));
|
2002-10-14 23:10:12 +00:00
|
|
|
strcpy(p, replacement);
|
|
|
|
strcat(p, temp);
|
|
|
|
}
|
2002-12-20 02:12:27 +00:00
|
|
|
|
2004-09-27 19:44:40 +00:00
|
|
|
// if the string contains a substring of the form X...Y,
|
|
|
|
// remove the first such.
|
2005-02-16 23:17:43 +00:00
|
|
|
bool remove_element(char* buf, const char* start, const char* end) {
|
2004-09-27 19:44:40 +00:00
|
|
|
char* p, *q;
|
|
|
|
p = strstr(buf, start);
|
|
|
|
if (!p) return false;
|
|
|
|
q = strstr(p+strlen(start), end);
|
|
|
|
if (!q) return false;
|
|
|
|
strcpy(p, q+strlen(end));
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2004-09-13 18:05:54 +00:00
|
|
|
// replace a substring. Do at most one instance.
|
|
|
|
//
|
2005-02-16 23:17:43 +00:00
|
|
|
bool str_replace(char* str, const char* substr, const char* replacement) {
|
2004-09-13 18:05:54 +00:00
|
|
|
char temp[4096], *p;
|
|
|
|
|
|
|
|
p = strstr(str, substr);
|
|
|
|
if (!p) return false;
|
2004-10-25 20:16:30 +00:00
|
|
|
int n = (int)strlen(substr);
|
2004-09-13 18:05:54 +00:00
|
|
|
strcpy(temp, p+n);
|
|
|
|
strcpy(p, replacement);
|
|
|
|
strcat(p, temp);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2003-03-06 00:42:18 +00:00
|
|
|
// if the given XML has an element of the form
|
|
|
|
// <venue name="venue_name">
|
|
|
|
// ...
|
|
|
|
// </venue>
|
|
|
|
// then return the contents of that element.
|
|
|
|
// Otherwise strip out all <venue> elements
|
|
|
|
//
|
2006-11-07 17:40:55 +00:00
|
|
|
void extract_venue(const char* in, const char* venue_name, char* out) {
|
|
|
|
const char* p, *q;
|
|
|
|
char* wp;
|
2003-03-06 00:42:18 +00:00
|
|
|
char buf[256];
|
|
|
|
sprintf(buf, "<venue name=\"%s\">", venue_name);
|
|
|
|
p = strstr(in, buf);
|
|
|
|
if (p) {
|
2006-11-07 17:40:55 +00:00
|
|
|
// prefs contain the specified venue
|
|
|
|
//
|
2003-03-06 00:42:18 +00:00
|
|
|
p += strlen(buf);
|
|
|
|
strcpy(out, p);
|
2006-11-07 17:40:55 +00:00
|
|
|
wp = strstr(out, "</venue");
|
|
|
|
if (wp) *wp = 0;
|
2003-03-06 00:42:18 +00:00
|
|
|
} else {
|
2006-11-07 17:40:55 +00:00
|
|
|
// prefs don't contain the specified venue
|
|
|
|
//
|
2006-10-22 00:42:44 +00:00
|
|
|
q = in;
|
|
|
|
strcpy(out, "");
|
|
|
|
while (1) {
|
|
|
|
p = strstr(q, "<venue");
|
|
|
|
if (!p) {
|
|
|
|
strcat(out, q);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
strncat(out, q, p-q);
|
|
|
|
q = strstr(p, "</venue>");
|
|
|
|
if (!q) break;
|
|
|
|
q += strlen("</venue>");
|
|
|
|
}
|
2003-03-06 00:42:18 +00:00
|
|
|
}
|
|
|
|
}
|
2003-05-20 00:03:39 +00:00
|
|
|
|
|
|
|
// copy a line from the given string.
|
|
|
|
// kinda like fgets() when you're reading from a string
|
|
|
|
//
|
|
|
|
char* sgets(char* buf, int len, char*& in) {
|
|
|
|
char* p;
|
|
|
|
|
|
|
|
p = strstr(in, "\n");
|
|
|
|
if (!p) return NULL;
|
|
|
|
*p = 0;
|
2006-01-05 06:42:45 +00:00
|
|
|
strlcpy(buf, in, len);
|
2003-05-20 00:03:39 +00:00
|
|
|
*p = '\n';
|
|
|
|
in = p+1;
|
|
|
|
return buf;
|
|
|
|
}
|
2003-06-16 19:06:08 +00:00
|
|
|
|
2006-04-17 22:41:29 +00:00
|
|
|
// NOTE: these used to take std::string instead of char* args.
|
|
|
|
// But this performed poorly.
|
|
|
|
//
|
2006-06-01 19:59:57 +00:00
|
|
|
// NOTE: output buffer should be 6X size of input
|
|
|
|
//
|
2006-04-17 22:41:29 +00:00
|
|
|
void xml_escape(const char* in, char* out) {
|
|
|
|
char buf[256], *p;
|
2005-01-13 01:00:46 +00:00
|
|
|
|
2006-04-17 22:41:29 +00:00
|
|
|
p = out;
|
|
|
|
|
|
|
|
for (; *in; in++) {
|
|
|
|
int x = (int) *in;
|
2005-01-13 01:00:46 +00:00
|
|
|
x &= 0xff; // just in case
|
2006-04-17 22:41:29 +00:00
|
|
|
if (x == '<') {
|
|
|
|
strcpy(p, "<");
|
|
|
|
p += 4;
|
|
|
|
} else if (x == '&') {
|
|
|
|
strcpy(p, "&");
|
|
|
|
p += 5;
|
2005-01-14 03:32:16 +00:00
|
|
|
} else if (x>127) {
|
2005-01-13 01:00:46 +00:00
|
|
|
sprintf(buf, "&#%d;", x);
|
2006-04-17 22:41:29 +00:00
|
|
|
strcpy(p, buf);
|
|
|
|
p += strlen(buf);
|
2005-01-14 03:32:16 +00:00
|
|
|
} else if (x<32) {
|
|
|
|
switch(x) {
|
|
|
|
case 9:
|
|
|
|
case 10:
|
|
|
|
case 13:
|
|
|
|
sprintf(buf, "&#%d;", x);
|
2006-04-17 22:41:29 +00:00
|
|
|
strcpy(p, buf);
|
|
|
|
p += strlen(buf);
|
2005-01-14 03:32:16 +00:00
|
|
|
break;
|
|
|
|
}
|
2005-01-13 01:00:46 +00:00
|
|
|
} else {
|
2006-04-17 22:41:29 +00:00
|
|
|
*p++ = x;
|
2005-01-13 01:00:46 +00:00
|
|
|
}
|
|
|
|
}
|
2006-04-17 22:41:29 +00:00
|
|
|
*p = 0;
|
2004-02-02 19:44:47 +00:00
|
|
|
}
|
|
|
|
|
2006-06-01 19:59:57 +00:00
|
|
|
// output buffer need not be larger than input
|
|
|
|
//
|
2006-04-17 22:41:29 +00:00
|
|
|
void xml_unescape(const char* in, char* out) {
|
|
|
|
char* p = out;
|
|
|
|
while (*in) {
|
|
|
|
if (!strncmp(in, "<", 4)) {
|
|
|
|
*p++ = '<';
|
|
|
|
in += 4;
|
|
|
|
} else if (!strncmp(in, "&", 5)) {
|
|
|
|
*p++ = '&';
|
|
|
|
in += 5;
|
|
|
|
} else if (!strncmp(in, "&#", 2)) {
|
|
|
|
in += 2;
|
|
|
|
char c = atoi(in);
|
|
|
|
*p++ = c;
|
|
|
|
in = strchr(in, ';');
|
|
|
|
if (in) in++;
|
2005-01-13 01:00:46 +00:00
|
|
|
} else {
|
2006-04-17 22:41:29 +00:00
|
|
|
*p++ = *in++;
|
2005-01-13 01:00:46 +00:00
|
|
|
}
|
|
|
|
}
|
2006-04-17 22:41:29 +00:00
|
|
|
*p = 0;
|
2004-02-02 21:09:05 +00:00
|
|
|
}
|
2004-08-11 23:52:22 +00:00
|
|
|
|
2005-05-31 21:59:29 +00:00
|
|
|
// we got an unrecognized line.
|
|
|
|
// If it has two <'s (e.g. <foo>xx</foo>) return 0.
|
|
|
|
// If it's of the form <foo> then scan for </foo> and return 0.
|
|
|
|
// Otherwise return ERR_XML_PARSE
|
|
|
|
//
|
|
|
|
int skip_unrecognized(char* buf, FILE* in) {
|
|
|
|
char* p, *q, buf2[256];
|
|
|
|
std::string close_tag;
|
|
|
|
|
|
|
|
p = strchr(buf, '<');
|
|
|
|
if (!p) {
|
|
|
|
return ERR_XML_PARSE;
|
|
|
|
}
|
|
|
|
if (strchr(p+1, '<')) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
q = strchr(p+1, '>');
|
|
|
|
if (!q) {
|
|
|
|
return ERR_XML_PARSE;
|
|
|
|
}
|
|
|
|
*q = 0;
|
|
|
|
close_tag = string("</") + string(p+1) + string(">");
|
|
|
|
while (fgets(buf2, 256, in)) {
|
|
|
|
if (strstr(buf2, close_tag.c_str())) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
return ERR_XML_PARSE;
|
|
|
|
}
|
2004-12-08 00:40:19 +00:00
|
|
|
|
2006-08-22 21:52:44 +00:00
|
|
|
XML_PARSER::XML_PARSER(MIOFILE* _f) {
|
2006-08-21 22:25:21 +00:00
|
|
|
f = _f;
|
|
|
|
}
|
|
|
|
|
|
|
|
// read until find non-whitespace char.
|
|
|
|
// Return the char in the reference param
|
|
|
|
// Return true iff reached EOF
|
2006-06-13 20:27:35 +00:00
|
|
|
//
|
2006-08-21 22:25:21 +00:00
|
|
|
bool XML_PARSER::scan_nonws(int& first_char) {
|
|
|
|
int c;
|
|
|
|
while (1) {
|
2006-09-27 16:50:47 +00:00
|
|
|
c = f->_getc();
|
2006-08-21 22:25:21 +00:00
|
|
|
if (c == EOF) return true;
|
|
|
|
if (isspace(c)) continue;
|
|
|
|
first_char = c;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// we just read a <; read until we find a >,
|
2006-11-08 15:38:06 +00:00
|
|
|
// and copy intervening text (except spaces) to buf.
|
2006-08-21 22:25:21 +00:00
|
|
|
// Return true iff reached EOF
|
2006-11-08 15:38:06 +00:00
|
|
|
// TODO: parse attributes too
|
2006-05-25 05:22:10 +00:00
|
|
|
//
|
2006-08-23 03:34:30 +00:00
|
|
|
bool XML_PARSER::scan_tag(char* buf, int len) {
|
2006-08-21 22:25:21 +00:00
|
|
|
int c;
|
|
|
|
while (1) {
|
2006-09-27 16:50:47 +00:00
|
|
|
c = f->_getc();
|
2006-08-21 22:25:21 +00:00
|
|
|
if (c == EOF) return true;
|
2006-11-08 15:38:06 +00:00
|
|
|
if (isspace(c)) continue;
|
2006-08-21 22:25:21 +00:00
|
|
|
if (c == '>') {
|
|
|
|
*buf = 0;
|
|
|
|
return false;
|
|
|
|
}
|
2006-08-23 03:34:30 +00:00
|
|
|
if (--len > 0) {
|
|
|
|
*buf++ = c;
|
|
|
|
}
|
2006-08-21 22:25:21 +00:00
|
|
|
}
|
|
|
|
}
|
2006-05-25 05:22:10 +00:00
|
|
|
|
2006-08-21 22:25:21 +00:00
|
|
|
// read and copy text to buf; stop when find a <;
|
|
|
|
// ungetc() that so we read it again
|
|
|
|
// Return true iff reached EOF
|
|
|
|
//
|
2006-08-23 03:34:30 +00:00
|
|
|
bool XML_PARSER::copy_until_tag(char* buf, int len) {
|
2006-08-21 22:25:21 +00:00
|
|
|
int c;
|
2006-05-25 05:22:10 +00:00
|
|
|
while (1) {
|
2006-09-27 16:50:47 +00:00
|
|
|
c = f->_getc();
|
2006-08-21 22:25:21 +00:00
|
|
|
if (c == EOF) return true;
|
|
|
|
if (c == '<') {
|
2006-09-27 16:50:47 +00:00
|
|
|
f->_ungetc(c);
|
2006-08-21 22:25:21 +00:00
|
|
|
*buf = 0;
|
|
|
|
return false;
|
2006-06-13 20:27:35 +00:00
|
|
|
}
|
2006-08-23 03:34:30 +00:00
|
|
|
if (--len > 0) {
|
|
|
|
*buf++ = c;
|
|
|
|
}
|
2006-05-25 05:22:10 +00:00
|
|
|
}
|
2006-08-21 22:25:21 +00:00
|
|
|
}
|
2006-05-25 05:22:10 +00:00
|
|
|
|
2006-08-21 22:25:21 +00:00
|
|
|
// Scan something, either tag or text.
|
|
|
|
// Strip whitespace at start and end.
|
|
|
|
// Return true iff reached EOF
|
|
|
|
//
|
2006-08-23 03:34:30 +00:00
|
|
|
bool XML_PARSER::get(char* buf, int len, bool& is_tag) {
|
2006-08-21 22:25:21 +00:00
|
|
|
bool eof;
|
|
|
|
int c;
|
|
|
|
|
|
|
|
eof = scan_nonws(c);
|
|
|
|
if (eof) return true;
|
|
|
|
if (c == '<') {
|
2006-08-23 03:34:30 +00:00
|
|
|
eof = scan_tag(buf, len);
|
2006-08-21 22:25:21 +00:00
|
|
|
if (eof) return true;
|
|
|
|
is_tag = true;
|
|
|
|
} else {
|
|
|
|
buf[0] = c;
|
2006-08-23 03:34:30 +00:00
|
|
|
eof = copy_until_tag(buf+1, len-1);
|
2006-08-21 22:25:21 +00:00
|
|
|
if (eof) return true;
|
|
|
|
is_tag = false;
|
|
|
|
}
|
|
|
|
strip_whitespace(buf);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// We just parsed "parsed_tag".
|
|
|
|
// If it matches "start_tag", and is followed by a string
|
|
|
|
// and by the matching close tag, return the string in "buf",
|
|
|
|
// and return true.
|
|
|
|
//
|
2006-08-23 03:34:30 +00:00
|
|
|
bool XML_PARSER::parse_str(
|
2006-11-03 19:24:21 +00:00
|
|
|
char* parsed_tag, const char* start_tag, char* buf, int len
|
2006-08-23 03:34:30 +00:00
|
|
|
) {
|
2006-08-21 22:25:21 +00:00
|
|
|
bool is_tag, eof;
|
2006-10-04 17:01:36 +00:00
|
|
|
char end_tag[256], tag[256], tmp[64000];
|
2006-05-25 05:22:10 +00:00
|
|
|
|
2006-08-21 22:25:21 +00:00
|
|
|
// handle the archaic form <tag/>, which means empty string
|
2006-05-25 05:22:10 +00:00
|
|
|
//
|
2006-08-21 22:25:21 +00:00
|
|
|
strcpy(tag, start_tag);
|
|
|
|
strcat(tag, "/");
|
|
|
|
if (!strcmp(parsed_tag, tag)) {
|
|
|
|
strcpy(buf, "");
|
2006-05-25 05:22:10 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2006-10-04 17:01:36 +00:00
|
|
|
// check for start tag
|
|
|
|
//
|
2006-08-21 22:25:21 +00:00
|
|
|
if (strcmp(parsed_tag, start_tag)) return false;
|
|
|
|
|
|
|
|
end_tag[0] = '/';
|
|
|
|
strcpy(end_tag+1, start_tag);
|
2006-10-04 17:01:36 +00:00
|
|
|
|
|
|
|
// get text after start tag
|
|
|
|
//
|
|
|
|
eof = get(tmp, 64000, is_tag);
|
|
|
|
if (eof) return false;
|
|
|
|
|
|
|
|
// if it's the end tag, return empty string
|
|
|
|
//
|
|
|
|
if (is_tag) {
|
2006-10-04 21:08:37 +00:00
|
|
|
if (strcmp(tmp, end_tag)) {
|
|
|
|
return false;
|
|
|
|
} else {
|
|
|
|
strcpy(buf, "");
|
|
|
|
return true;
|
|
|
|
}
|
2006-10-04 17:01:36 +00:00
|
|
|
}
|
|
|
|
|
2006-08-23 03:34:30 +00:00
|
|
|
eof = get(tag, sizeof(tag), is_tag);
|
2006-08-21 22:25:21 +00:00
|
|
|
if (eof) return false;
|
|
|
|
if (!is_tag) return false;
|
|
|
|
if (strcmp(tag, end_tag)) return false;
|
2006-10-04 17:01:36 +00:00
|
|
|
strlcpy(buf, tmp, len);
|
2006-08-21 22:25:21 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2006-09-10 03:40:36 +00:00
|
|
|
bool XML_PARSER::parse_string(
|
2006-11-03 19:24:21 +00:00
|
|
|
char* parsed_tag, const char* start_tag, string& str
|
2006-09-10 03:40:36 +00:00
|
|
|
) {
|
|
|
|
char buf[8192];
|
|
|
|
bool flag = parse_str(parsed_tag, start_tag, buf, sizeof(buf));
|
|
|
|
if (!flag) return false;
|
|
|
|
str = buf;
|
2006-09-12 05:04:59 +00:00
|
|
|
return true;
|
2006-09-10 03:40:36 +00:00
|
|
|
}
|
|
|
|
|
2006-08-21 22:25:21 +00:00
|
|
|
// Same, for integers
|
|
|
|
//
|
2006-11-03 19:24:21 +00:00
|
|
|
bool XML_PARSER::parse_int(char* parsed_tag, const char* start_tag, int& i) {
|
2006-08-21 22:25:21 +00:00
|
|
|
char buf[256], *end;
|
|
|
|
bool is_tag, eof;
|
|
|
|
char end_tag[256], tag[256];
|
|
|
|
|
|
|
|
if (strcmp(parsed_tag, start_tag)) return false;
|
|
|
|
|
2006-08-23 03:34:30 +00:00
|
|
|
eof = get(buf, sizeof(buf), is_tag);
|
2006-08-21 22:25:21 +00:00
|
|
|
if (eof) return false;
|
|
|
|
if (is_tag) return false;
|
|
|
|
i = strtol(buf, &end, 0);
|
|
|
|
if (end != buf+strlen(buf)) return false;
|
|
|
|
|
|
|
|
end_tag[0] = '/';
|
|
|
|
strcpy(end_tag+1, start_tag);
|
2006-08-23 03:34:30 +00:00
|
|
|
eof = get(tag, sizeof(tag), is_tag);
|
2006-08-21 22:25:21 +00:00
|
|
|
if (eof) return false;
|
|
|
|
if (!is_tag) return false;
|
|
|
|
if (strcmp(tag, end_tag)) return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Same, for doubles
|
|
|
|
//
|
2006-11-03 19:24:21 +00:00
|
|
|
bool XML_PARSER::parse_double(char* parsed_tag, const char* start_tag, double& x) {
|
2006-08-21 22:25:21 +00:00
|
|
|
char buf[256], *end;
|
|
|
|
bool is_tag, eof;
|
|
|
|
char end_tag[256], tag[256];
|
|
|
|
|
|
|
|
if (strcmp(parsed_tag, start_tag)) return false;
|
|
|
|
|
2006-08-23 03:34:30 +00:00
|
|
|
eof = get(buf, sizeof(buf), is_tag);
|
2006-08-21 22:25:21 +00:00
|
|
|
if (eof) return false;
|
|
|
|
if (is_tag) return false;
|
|
|
|
x = strtod(buf, &end);
|
|
|
|
if (end != buf+strlen(buf)) return false;
|
|
|
|
|
|
|
|
end_tag[0] = '/';
|
|
|
|
strcpy(end_tag+1, start_tag);
|
2006-08-23 03:34:30 +00:00
|
|
|
eof = get(tag, sizeof(tag), is_tag);
|
2006-08-21 22:25:21 +00:00
|
|
|
if (eof) return false;
|
|
|
|
if (!is_tag) return false;
|
|
|
|
if (strcmp(tag, end_tag)) return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Same, for bools
|
|
|
|
//
|
2006-11-03 19:24:21 +00:00
|
|
|
bool XML_PARSER::parse_bool(char* parsed_tag, const char* start_tag, bool& b) {
|
2006-08-21 22:25:21 +00:00
|
|
|
char buf[256], *end;
|
|
|
|
bool is_tag, eof;
|
|
|
|
char end_tag[256], tag[256];
|
|
|
|
|
|
|
|
// handle the archaic form <tag/>, which means true
|
2006-05-25 05:22:10 +00:00
|
|
|
//
|
2006-08-21 22:25:21 +00:00
|
|
|
strcpy(tag, start_tag);
|
|
|
|
strcat(tag, "/");
|
|
|
|
if (!strcmp(parsed_tag, tag)) {
|
|
|
|
b = true;
|
2006-05-25 05:22:10 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2006-08-21 22:25:21 +00:00
|
|
|
// otherwise look for something of the form <tag>int</tag>
|
2006-05-25 05:22:10 +00:00
|
|
|
//
|
2006-08-21 22:25:21 +00:00
|
|
|
if (strcmp(parsed_tag, start_tag)) return false;
|
|
|
|
|
2006-08-23 03:34:30 +00:00
|
|
|
eof = get(buf, sizeof(buf), is_tag);
|
2006-08-21 22:25:21 +00:00
|
|
|
if (eof) return false;
|
|
|
|
if (is_tag) return false;
|
2006-09-07 18:19:25 +00:00
|
|
|
b = (strtol(buf, &end, 0) != 0);
|
2006-08-21 22:25:21 +00:00
|
|
|
if (end != buf+strlen(buf)) return false;
|
|
|
|
|
|
|
|
end_tag[0] = '/';
|
|
|
|
strcpy(end_tag+1, start_tag);
|
2006-08-23 03:34:30 +00:00
|
|
|
eof = get(tag, sizeof(tag), is_tag);
|
2006-08-21 22:25:21 +00:00
|
|
|
if (eof) return false;
|
|
|
|
if (!is_tag) return false;
|
|
|
|
if (strcmp(tag, end_tag)) return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2006-09-10 03:40:36 +00:00
|
|
|
// parse a start tag (optionally preceded by <?xml>)
|
|
|
|
//
|
2006-11-03 19:24:21 +00:00
|
|
|
bool XML_PARSER::parse_start(const char* start_tag) {
|
2006-08-21 22:25:21 +00:00
|
|
|
char tag[256];
|
|
|
|
bool eof, is_tag;
|
|
|
|
|
2006-08-23 03:34:30 +00:00
|
|
|
eof = get(tag, sizeof(tag), is_tag);
|
2006-08-21 22:25:21 +00:00
|
|
|
if (eof || !is_tag ) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (strstr(tag, "?xml")) {
|
2006-08-23 03:34:30 +00:00
|
|
|
eof = get(tag, sizeof(tag), is_tag);
|
2006-08-21 22:25:21 +00:00
|
|
|
if (eof || !is_tag ) {
|
|
|
|
return false;
|
2006-05-26 19:40:11 +00:00
|
|
|
}
|
2006-05-25 05:22:10 +00:00
|
|
|
}
|
2006-08-21 22:25:21 +00:00
|
|
|
if (strcmp(tag, start_tag)) {
|
|
|
|
return false;
|
2006-05-25 05:22:10 +00:00
|
|
|
}
|
2006-08-21 22:25:21 +00:00
|
|
|
return true;
|
2006-05-25 05:22:10 +00:00
|
|
|
}
|
|
|
|
|
2006-09-12 18:18:15 +00:00
|
|
|
// copy everything up to (but not including) the given end tag.
|
|
|
|
// The copied text may include XML tags.
|
|
|
|
// strips whitespace.
|
|
|
|
//
|
|
|
|
int XML_PARSER::element_contents(const char* end_tag, char* buf, int buflen) {
|
|
|
|
int n=0;
|
|
|
|
int retval=0;
|
|
|
|
while (1) {
|
|
|
|
if (n == buflen-1) {
|
|
|
|
retval = ERR_XML_PARSE;
|
|
|
|
break;
|
|
|
|
}
|
2006-09-27 16:50:47 +00:00
|
|
|
int c = f->_getc();
|
2006-09-12 18:18:15 +00:00
|
|
|
if (c == EOF) {
|
|
|
|
retval = ERR_XML_PARSE;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
buf[n++] = c;
|
|
|
|
buf[n] = 0;
|
|
|
|
char* p = strstr(buf, end_tag);
|
|
|
|
if (p) {
|
|
|
|
*p = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
buf[n] = 0;
|
|
|
|
strip_whitespace(buf);
|
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
|
2006-08-21 22:25:21 +00:00
|
|
|
// sample use is shown below
|
2006-05-25 05:22:10 +00:00
|
|
|
|
2006-08-21 22:25:21 +00:00
|
|
|
#if 0
|
|
|
|
void parse(FILE* f) {
|
|
|
|
char tag[256];
|
|
|
|
bool is_tag, flag;
|
2006-08-23 03:34:30 +00:00
|
|
|
MIOFILE mf;
|
2007-02-24 20:10:06 +00:00
|
|
|
XML_PARSER xp(&mf);
|
2006-08-21 22:25:21 +00:00
|
|
|
char name[256];
|
|
|
|
int val;
|
|
|
|
double x;
|
2006-05-25 05:22:10 +00:00
|
|
|
|
2006-08-23 03:34:30 +00:00
|
|
|
mf.init_file(f);
|
2006-08-21 22:25:21 +00:00
|
|
|
if (!xp.parse_start("blah")) {
|
|
|
|
printf("missing start tag\n");
|
|
|
|
return;
|
|
|
|
}
|
2006-08-23 03:34:30 +00:00
|
|
|
while (!xp.get(tag, sizeof(tag), is_tag)) {
|
2006-08-21 22:25:21 +00:00
|
|
|
if (!is_tag) {
|
|
|
|
printf("unexpected text: %s\n", tag);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!strcmp(tag, "/blah")) {
|
|
|
|
printf("success\n");
|
|
|
|
return;
|
2006-08-23 03:34:30 +00:00
|
|
|
} else if (xp.parse_str(tag, "str", name, sizeof(name))) {
|
2006-08-21 22:25:21 +00:00
|
|
|
printf("got str: %s\n", name);
|
|
|
|
} else if (xp.parse_int(tag, "int", val)) {
|
|
|
|
printf("got int: %d\n", val);
|
|
|
|
} else if (xp.parse_double(tag, "double", x)) {
|
|
|
|
printf("got double: %f\n", x);
|
|
|
|
} else if (xp.parse_bool(tag, "bool", flag)) {
|
|
|
|
printf("got bool: %d\n", flag);
|
|
|
|
} else {
|
|
|
|
printf("unparsed tag: %s\n", tag);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
printf("unexpected EOF\n");
|
2006-05-25 05:22:10 +00:00
|
|
|
}
|
|
|
|
|
2006-08-21 22:25:21 +00:00
|
|
|
int main() {
|
|
|
|
FILE* f = fopen("foo.xml", "r");
|
|
|
|
parse(f);
|
|
|
|
}
|
|
|
|
#endif
|
2005-01-02 18:29:53 +00:00
|
|
|
const char *BOINC_RCSID_3f3de9eb18 = "$Id$";
|