2008-08-06 18:36:30 +00:00
|
|
|
// This file is part of BOINC.
|
2005-01-20 23:22:22 +00:00
|
|
|
// http://boinc.berkeley.edu
|
2023-02-12 18:24:56 +00:00
|
|
|
// Copyright (C) 2023 University of California
|
2004-11-14 08:29:32 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// BOINC is free software; you can redistribute it and/or modify it
|
|
|
|
// under the terms of the GNU Lesser General Public License
|
|
|
|
// as published by the Free Software Foundation,
|
|
|
|
// either version 3 of the License, or (at your option) any later version.
|
2003-10-03 06:46:22 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// BOINC is distributed in the hope that it will be useful,
|
2005-01-20 23:22:22 +00:00
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
// See the GNU Lesser General Public License for more details.
|
2002-04-30 22:22:54 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
|
|
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
|
2002-04-30 22:22:54 +00:00
|
|
|
|
2017-04-08 06:54:49 +00:00
|
|
|
#ifndef BOINC_PARSE_H
|
|
|
|
#define BOINC_PARSE_H
|
2004-06-12 18:44:53 +00:00
|
|
|
|
2009-02-26 00:23:23 +00:00
|
|
|
#include <cstdio>
|
2008-04-06 03:15:36 +00:00
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
2008-08-05 15:20:02 +00:00
|
|
|
#include <errno.h>
|
2023-04-28 10:14:45 +00:00
|
|
|
#ifdef __APPLE__
|
|
|
|
#include <xlocale.h>
|
|
|
|
#endif
|
2003-10-03 06:46:22 +00:00
|
|
|
|
2006-08-22 21:52:44 +00:00
|
|
|
#include "miofile.h"
|
2012-11-07 08:08:18 +00:00
|
|
|
#include "error_numbers.h"
|
2011-08-23 12:10:38 +00:00
|
|
|
#include "str_util.h"
|
2023-04-11 03:13:09 +00:00
|
|
|
|
|
|
|
extern bool boinc_is_finite(double);
|
|
|
|
// avoid including util.h (kludge)
|
2006-08-22 21:52:44 +00:00
|
|
|
|
2011-10-03 21:43:34 +00:00
|
|
|
// see parse_test.cpp for example usage of XML_PARSER
|
|
|
|
|
2012-11-07 08:08:18 +00:00
|
|
|
#define XML_PARSE_COMMENT 1
|
|
|
|
#define XML_PARSE_EOF 2
|
|
|
|
#define XML_PARSE_CDATA 3
|
|
|
|
#define XML_PARSE_TAG 4
|
|
|
|
#define XML_PARSE_DATA 5
|
2013-02-27 21:03:44 +00:00
|
|
|
#define XML_PARSE_OVERFLOW 6
|
|
|
|
|
2017-06-09 22:12:40 +00:00
|
|
|
#define TAG_BUF_LEN 4096
|
|
|
|
// max tag length
|
|
|
|
#define ELEMENT_BUF_LEN 65536
|
|
|
|
// max element length (matches BLOB_SIZE, max size of XML fields in DB)
|
2012-11-07 08:08:18 +00:00
|
|
|
|
|
|
|
struct XML_PARSER {
|
2007-06-11 21:30:26 +00:00
|
|
|
int scan_comment();
|
2010-01-13 05:32:11 +00:00
|
|
|
int scan_cdata(char*, int);
|
2017-06-09 22:12:40 +00:00
|
|
|
char parsed_tag[TAG_BUF_LEN];
|
2011-08-10 17:11:08 +00:00
|
|
|
bool is_tag;
|
2009-02-25 19:18:41 +00:00
|
|
|
MIOFILE* f;
|
2006-08-22 21:52:44 +00:00
|
|
|
XML_PARSER(MIOFILE*);
|
2011-09-18 21:06:49 +00:00
|
|
|
void init(MIOFILE* mf) {
|
|
|
|
f = mf;
|
|
|
|
}
|
2012-11-07 08:08:18 +00:00
|
|
|
// read and copy text to buf; stop when find a <;
|
|
|
|
// ungetc() that so we read it again
|
2013-02-27 21:03:44 +00:00
|
|
|
// Return XML_PARSE_DATA if successful
|
2012-11-07 08:08:18 +00:00
|
|
|
//
|
2013-02-27 21:03:44 +00:00
|
|
|
inline int copy_until_tag(char* buf, int len) {
|
2012-11-07 08:08:18 +00:00
|
|
|
int c;
|
|
|
|
while (1) {
|
|
|
|
c = f->_getc();
|
2013-02-27 21:03:44 +00:00
|
|
|
if (!c || c == EOF) return XML_PARSE_EOF;
|
2012-11-07 08:08:18 +00:00
|
|
|
if (c == '<') {
|
|
|
|
f->_ungetc(c);
|
|
|
|
*buf = 0;
|
2013-02-27 21:03:44 +00:00
|
|
|
return XML_PARSE_DATA;
|
2012-11-07 08:08:18 +00:00
|
|
|
}
|
2013-02-27 21:03:44 +00:00
|
|
|
if (--len <= 0) {
|
|
|
|
return XML_PARSE_OVERFLOW;
|
2012-11-07 08:08:18 +00:00
|
|
|
}
|
2018-08-02 19:18:15 +00:00
|
|
|
*buf++ = (char)c;
|
2012-11-07 08:08:18 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-02-27 21:03:44 +00:00
|
|
|
// return true if EOF or error
|
|
|
|
//
|
2012-11-07 08:08:18 +00:00
|
|
|
inline bool get(
|
|
|
|
char* buf, int len, bool& _is_tag, char* attr_buf=0, int attr_len=0
|
|
|
|
) {
|
|
|
|
switch (get_aux(buf, len, attr_buf, attr_len)) {
|
2013-02-27 21:03:44 +00:00
|
|
|
case XML_PARSE_EOF:
|
|
|
|
case XML_PARSE_OVERFLOW:
|
|
|
|
return true;
|
2012-11-07 08:08:18 +00:00
|
|
|
case XML_PARSE_TAG:
|
|
|
|
_is_tag = true;
|
|
|
|
break;
|
|
|
|
case XML_PARSE_DATA:
|
|
|
|
case XML_PARSE_CDATA:
|
|
|
|
default:
|
|
|
|
_is_tag = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2011-08-10 17:11:08 +00:00
|
|
|
inline bool get_tag(char* ab=0, int al=0) {
|
2013-02-27 21:03:44 +00:00
|
|
|
if (get(parsed_tag, sizeof(parsed_tag), is_tag, ab, al)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if (strlen(parsed_tag) > TAG_BUF_LEN-10) {
|
2013-03-06 06:15:30 +00:00
|
|
|
parsed_tag[TAG_BUF_LEN-10] = 0;
|
2013-02-27 21:03:44 +00:00
|
|
|
}
|
|
|
|
return false;
|
2011-08-10 17:11:08 +00:00
|
|
|
}
|
|
|
|
inline bool match_tag(const char* tag) {
|
|
|
|
return !strcmp(parsed_tag, tag);
|
|
|
|
}
|
2012-11-07 08:08:18 +00:00
|
|
|
|
|
|
|
// read until find non-whitespace char.
|
|
|
|
// Return the char in the reference param
|
|
|
|
// Return true iff reached EOF
|
|
|
|
//
|
|
|
|
inline bool scan_nonws(int& first_char) {
|
2013-01-17 03:42:48 +00:00
|
|
|
int c;
|
2012-11-07 08:08:18 +00:00
|
|
|
while (1) {
|
|
|
|
c = f->_getc();
|
2013-02-27 21:03:44 +00:00
|
|
|
if (!c || c == EOF) return true;
|
2013-03-01 04:18:26 +00:00
|
|
|
if (isascii(c) && isspace(c)) continue;
|
2012-11-07 08:08:18 +00:00
|
|
|
first_char = c;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Scan something, either tag or text.
|
2013-03-30 05:36:53 +00:00
|
|
|
// Strip whitespace at start and end
|
|
|
|
// (however, the supplied buffer must accommodate this white space).
|
|
|
|
// Ignore comments.
|
2012-11-07 08:08:18 +00:00
|
|
|
// Return true iff reached EOF
|
|
|
|
//
|
|
|
|
inline int get_aux(
|
|
|
|
char* buf, int len, char* attr_buf, int attr_len
|
|
|
|
) {
|
|
|
|
bool eof;
|
|
|
|
int c, retval;
|
|
|
|
|
|
|
|
while (1) {
|
|
|
|
eof = scan_nonws(c);
|
|
|
|
if (eof) return XML_PARSE_EOF;
|
|
|
|
if (c == '<') {
|
|
|
|
retval = scan_tag(buf, len, attr_buf, attr_len);
|
|
|
|
if (retval == XML_PARSE_EOF) return retval;
|
2013-02-27 21:03:44 +00:00
|
|
|
if (retval == XML_PARSE_OVERFLOW) return retval;
|
2012-11-07 08:08:18 +00:00
|
|
|
if (retval == XML_PARSE_COMMENT) continue;
|
|
|
|
} else {
|
2018-08-02 19:18:15 +00:00
|
|
|
buf[0] = (char)c;
|
2013-02-27 21:03:44 +00:00
|
|
|
retval = copy_until_tag(buf+1, len-1);
|
|
|
|
if (retval != XML_PARSE_DATA) return retval;
|
2012-11-07 08:08:18 +00:00
|
|
|
}
|
|
|
|
strip_whitespace(buf);
|
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// we just read a <; read until we find a >.
|
|
|
|
// Given <tag [attr=val attr=val] [/]>:
|
|
|
|
// - copy tag (or tag/) to buf
|
|
|
|
// - copy "attr=val attr=val" to attr_buf
|
|
|
|
//
|
|
|
|
// Return either
|
|
|
|
// XML_PARSE_TAG
|
|
|
|
// XML_PARSE_COMMENT
|
|
|
|
// XML_PARSE_EOF
|
|
|
|
// XML_PARSE_CDATA
|
|
|
|
//
|
|
|
|
inline int scan_tag(
|
|
|
|
char* buf, int _tag_len, char* attr_buf=0, int attr_len=0
|
|
|
|
) {
|
|
|
|
int c;
|
|
|
|
char* buf_start = buf;
|
|
|
|
bool found_space = false;
|
|
|
|
int tag_len = _tag_len;
|
|
|
|
|
|
|
|
for (int i=0; ; i++) {
|
|
|
|
c = f->_getc();
|
2013-02-27 21:03:44 +00:00
|
|
|
if (!c || c == EOF) return XML_PARSE_EOF;
|
2012-11-07 08:08:18 +00:00
|
|
|
if (c == '>') {
|
|
|
|
*buf = 0;
|
|
|
|
if (attr_buf) *attr_buf = 0;
|
|
|
|
return XML_PARSE_TAG;
|
|
|
|
}
|
2013-03-01 04:18:26 +00:00
|
|
|
if (isascii(c) && isspace(c)) {
|
2012-11-07 08:08:18 +00:00
|
|
|
if (found_space && attr_buf) {
|
|
|
|
if (--attr_len > 0) {
|
2018-03-01 22:16:53 +00:00
|
|
|
*attr_buf++ = (char)c;
|
2012-11-07 08:08:18 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
found_space = true;
|
|
|
|
} else if (c == '/') {
|
|
|
|
if (--tag_len > 0) {
|
2018-03-01 22:16:53 +00:00
|
|
|
*buf++ = (char)c;
|
2013-02-27 21:03:44 +00:00
|
|
|
} else {
|
|
|
|
return XML_PARSE_OVERFLOW;
|
2012-11-07 08:08:18 +00:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (found_space) {
|
|
|
|
if (attr_buf) {
|
|
|
|
if (--attr_len > 0) {
|
2018-03-01 22:16:53 +00:00
|
|
|
*attr_buf++ = (char)c;
|
2012-11-07 08:08:18 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (--tag_len > 0) {
|
2018-03-01 22:16:53 +00:00
|
|
|
*buf++ = (char)c;
|
2013-02-27 21:03:44 +00:00
|
|
|
} else {
|
|
|
|
return XML_PARSE_OVERFLOW;
|
2012-11-07 08:08:18 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// check for comment start
|
|
|
|
//
|
|
|
|
if (i==2 && !strncmp(buf_start, "!--", 3)) {
|
|
|
|
return scan_comment();
|
|
|
|
}
|
|
|
|
if (i==7 && !strncmp(buf_start, "![CDATA[", 8)) {
|
|
|
|
return scan_cdata(buf_start, tag_len);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// copy everything up to (but not including) the given end tag.
|
|
|
|
// The copied text may include XML tags.
|
2017-06-14 05:35:17 +00:00
|
|
|
// strips start/end whitespace.
|
2012-11-07 08:08:18 +00:00
|
|
|
//
|
|
|
|
inline int element_contents(const char* end_tag, char* buf, int buflen) {
|
|
|
|
int n=0;
|
|
|
|
int retval=0;
|
|
|
|
while (1) {
|
|
|
|
if (n == buflen-1) {
|
|
|
|
retval = ERR_XML_PARSE;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
int c = f->_getc();
|
2013-02-27 21:03:44 +00:00
|
|
|
if (!c || c == EOF) {
|
2012-11-07 08:08:18 +00:00
|
|
|
retval = ERR_XML_PARSE;
|
|
|
|
break;
|
|
|
|
}
|
2018-03-01 22:16:53 +00:00
|
|
|
buf[n++] = (char)c;
|
2012-11-07 08:08:18 +00:00
|
|
|
buf[n] = 0;
|
|
|
|
char* p = strstr(buf, end_tag);
|
|
|
|
if (p) {
|
|
|
|
*p = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
buf[n] = 0;
|
|
|
|
strip_whitespace(buf);
|
|
|
|
return retval;
|
|
|
|
}
|
2013-05-27 18:45:10 +00:00
|
|
|
bool parse_str_aux(const char*, char*, int);
|
2012-11-07 08:08:18 +00:00
|
|
|
|
2013-05-27 18:45:10 +00:00
|
|
|
// interface starts here
|
|
|
|
//
|
2006-11-03 19:24:21 +00:00
|
|
|
bool parse_start(const char*);
|
2011-08-10 17:11:08 +00:00
|
|
|
bool parse_str(const char*, char*, int);
|
|
|
|
bool parse_string(const char*, std::string&);
|
|
|
|
bool parse_int(const char*, int&);
|
2015-07-23 17:11:08 +00:00
|
|
|
bool parse_long(const char*, long&);
|
2011-08-10 17:11:08 +00:00
|
|
|
bool parse_double(const char*, double&);
|
2011-09-15 04:24:40 +00:00
|
|
|
bool parse_ulong(const char*, unsigned long&);
|
|
|
|
bool parse_ulonglong(const char*, unsigned long long&);
|
2011-08-10 17:11:08 +00:00
|
|
|
bool parse_bool(const char*, bool&);
|
2011-08-27 16:52:04 +00:00
|
|
|
int copy_element(std::string&);
|
2007-07-03 21:55:50 +00:00
|
|
|
void skip_unexpected(const char*, bool verbose, const char*);
|
2011-08-11 06:17:33 +00:00
|
|
|
void skip_unexpected(bool verbose=false, const char* msg="") {
|
2011-08-10 17:11:08 +00:00
|
|
|
skip_unexpected(parsed_tag, verbose, msg);
|
|
|
|
}
|
2006-08-21 22:25:21 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
/////////////// START DEPRECATED XML PARSER
|
|
|
|
// Deprecated because it makes assumptions about
|
|
|
|
// the format of the XML being parsed
|
|
|
|
///////////////
|
|
|
|
|
2006-04-18 17:55:14 +00:00
|
|
|
// return true if the tag appears in the line
|
|
|
|
//
|
|
|
|
inline bool match_tag(const char* buf, const char* tag) {
|
|
|
|
if (strstr(buf, tag)) return true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline bool match_tag(const std::string &s, const char* tag) {
|
|
|
|
return match_tag(s.c_str(), tag);
|
|
|
|
}
|
|
|
|
|
2020-10-22 18:24:08 +00:00
|
|
|
extern unsigned long long boinc_strtoull(const char *, char **, int);
|
2011-08-23 12:10:38 +00:00
|
|
|
|
2006-04-18 17:55:14 +00:00
|
|
|
// parse an integer of the form <tag>1234</tag>
|
|
|
|
// return true if it's there
|
|
|
|
// Note: this doesn't check for the end tag
|
|
|
|
//
|
|
|
|
inline bool parse_int(const char* buf, const char* tag, int& x) {
|
|
|
|
const char* p = strstr(buf, tag);
|
|
|
|
if (!p) return false;
|
2011-09-15 04:24:40 +00:00
|
|
|
errno = 0;
|
2008-08-05 15:20:02 +00:00
|
|
|
int y = strtol(p+strlen(tag), 0, 0); // this parses 0xabcd correctly
|
2011-09-15 04:24:40 +00:00
|
|
|
if (errno) return false;
|
2008-08-05 15:20:02 +00:00
|
|
|
x = y;
|
2006-04-18 17:55:14 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Same, for doubles
|
|
|
|
//
|
|
|
|
inline bool parse_double(const char* buf, const char* tag, double& x) {
|
|
|
|
double y;
|
|
|
|
const char* p = strstr(buf, tag);
|
|
|
|
if (!p) return false;
|
2011-09-15 04:24:40 +00:00
|
|
|
errno = 0;
|
2023-04-28 10:14:45 +00:00
|
|
|
#ifdef __APPLE__
|
|
|
|
// MacOS 13.3.1 apparently broke per-thread locale uselocale()
|
|
|
|
y = strtod_l(p+strlen(tag), NULL, LC_C_LOCALE);
|
|
|
|
#else
|
2011-09-15 04:24:40 +00:00
|
|
|
y = strtod(p+strlen(tag), NULL);
|
2023-04-28 10:14:45 +00:00
|
|
|
#endif
|
2011-09-15 04:24:40 +00:00
|
|
|
if (errno) return false;
|
2009-06-16 19:22:11 +00:00
|
|
|
if (!boinc_is_finite(y)) {
|
|
|
|
return false;
|
2006-04-18 17:55:14 +00:00
|
|
|
}
|
2009-06-16 19:22:11 +00:00
|
|
|
x = y;
|
|
|
|
return true;
|
2006-04-18 17:55:14 +00:00
|
|
|
}
|
|
|
|
|
2003-06-17 01:03:45 +00:00
|
|
|
extern bool parse(char* , char* );
|
|
|
|
extern bool parse_str(const char*, const char*, char*, int);
|
2004-06-30 18:17:21 +00:00
|
|
|
extern bool parse_str(const char* buf, const char* tag, std::string& dest);
|
2003-06-17 18:43:13 +00:00
|
|
|
extern void parse_attr(const char* buf, const char* attrname, char* out, int len);
|
2005-08-12 18:31:47 +00:00
|
|
|
extern bool parse_bool(const char*, const char*, bool&);
|
2006-08-21 22:25:21 +00:00
|
|
|
|
|
|
|
/////////////// END DEPRECATED XML PARSER
|
|
|
|
|
2007-07-10 23:42:05 +00:00
|
|
|
extern int copy_stream(FILE* in, FILE* out);
|
2006-03-02 22:51:41 +00:00
|
|
|
extern int strcatdup(char*& p, char* buf);
|
2003-06-17 01:03:45 +00:00
|
|
|
extern int dup_element_contents(FILE* in, const char* end_tag, char** pp);
|
2008-09-04 08:33:21 +00:00
|
|
|
extern int dup_element(FILE* in, const char* end_tag, char** pp);
|
2017-08-25 22:43:21 +00:00
|
|
|
extern int copy_element_contents(FILE* in, const char* end_tag, char* p, size_t len);
|
2004-06-30 18:17:21 +00:00
|
|
|
extern int copy_element_contents(FILE* in, const char* end_tag, std::string&);
|
2004-09-13 18:05:54 +00:00
|
|
|
extern void replace_element_contents(
|
2005-02-16 23:17:43 +00:00
|
|
|
char* buf, const char* start, const char* end, const char* replacement
|
2004-09-13 18:05:54 +00:00
|
|
|
);
|
2005-02-16 23:17:43 +00:00
|
|
|
extern bool remove_element(char* buf, const char* start, const char* end);
|
|
|
|
extern bool str_replace(char* str, const char* old, const char* neww);
|
2003-05-20 00:03:39 +00:00
|
|
|
extern char* sgets(char* buf, int len, char* &in);
|
2011-05-25 16:40:19 +00:00
|
|
|
extern void non_ascii_escape(const char*, char*, int len);
|
2008-08-13 17:27:13 +00:00
|
|
|
extern void xml_escape(const char*, char*, int len);
|
2009-08-07 18:16:21 +00:00
|
|
|
extern void xml_unescape(std::string&);
|
2011-09-14 17:58:53 +00:00
|
|
|
extern void xml_unescape(char*);
|
2013-06-04 03:24:48 +00:00
|
|
|
extern void extract_venue(const char*, const char*, char*, int len);
|
2007-06-08 07:55:27 +00:00
|
|
|
extern int skip_unrecognized(char* buf, MIOFILE&);
|
2004-06-12 18:44:53 +00:00
|
|
|
|
|
|
|
#endif
|